summaryrefslogtreecommitdiff
path: root/sandbox
diff options
context:
space:
mode:
authorAlex Vakulenko <avakulenko@google.com>2016-01-22 16:52:43 -0800
committerAlex Vakulenko <avakulenko@google.com>2016-01-22 17:02:32 -0800
commitf6024733c0d1eed88f68520b5e6a20b96e212ad6 (patch)
treefed955593d9995a027a33cd46b41260882ddd3ea /sandbox
parent340b8dd38ca56de409ca4f790b45b1c314c544dd (diff)
downloadlibchrome-f6024733c0d1eed88f68520b5e6a20b96e212ad6.tar.gz
libchrome: Revert deleted files needed by Chrome OS
Some of the "unused" files were apparently needed for building libchrome on Chrome OS. Reverting deletion of these files. Change-Id: I02e32f112d16480206f43ca0087342a9de7f1e1b
Diffstat (limited to 'sandbox')
-rw-r--r--sandbox/linux/bpf_dsl/bpf_dsl.cc363
-rw-r--r--sandbox/linux/bpf_dsl/bpf_dsl.h317
-rw-r--r--sandbox/linux/bpf_dsl/bpf_dsl_unittest.cc486
-rw-r--r--sandbox/linux/bpf_dsl/codegen.cc159
-rw-r--r--sandbox/linux/bpf_dsl/codegen.h123
-rw-r--r--sandbox/linux/bpf_dsl/codegen_unittest.cc402
-rw-r--r--sandbox/linux/bpf_dsl/dump_bpf.cc109
-rw-r--r--sandbox/linux/bpf_dsl/dump_bpf.h18
-rw-r--r--sandbox/linux/bpf_dsl/policy.cc19
-rw-r--r--sandbox/linux/bpf_dsl/policy.h37
-rw-r--r--sandbox/linux/bpf_dsl/policy_compiler.cc499
-rw-r--r--sandbox/linux/bpf_dsl/policy_compiler.h159
-rw-r--r--sandbox/linux/bpf_dsl/syscall_set.cc144
-rw-r--r--sandbox/linux/bpf_dsl/syscall_set.h103
-rw-r--r--sandbox/linux/bpf_dsl/syscall_set_unittest.cc124
-rw-r--r--sandbox/linux/bpf_dsl/verifier.cc396
-rw-r--r--sandbox/linux/bpf_dsl/verifier.h57
-rw-r--r--sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc270
-rw-r--r--sandbox/linux/seccomp-bpf-helpers/baseline_policy.h48
-rw-r--r--sandbox/linux/seccomp-bpf-helpers/baseline_policy_unittest.cc334
-rw-r--r--sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.cc297
-rw-r--r--sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h82
-rw-r--r--sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.cc319
-rw-r--r--sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h100
-rw-r--r--sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions_unittests.cc282
-rw-r--r--sandbox/linux/seccomp-bpf-helpers/syscall_sets.cc1060
-rw-r--r--sandbox/linux/seccomp-bpf-helpers/syscall_sets.h112
-rw-r--r--sandbox/linux/seccomp-bpf/bpf_tests_unittest.cc153
-rw-r--r--sandbox/linux/seccomp-bpf/die.cc93
-rw-r--r--sandbox/linux/seccomp-bpf/die.h68
-rw-r--r--sandbox/linux/seccomp-bpf/sandbox_bpf.cc279
-rw-r--r--sandbox/linux/seccomp-bpf/sandbox_bpf.h118
-rw-r--r--sandbox/linux/seccomp-bpf/syscall.cc421
-rw-r--r--sandbox/linux/seccomp-bpf/syscall.h166
-rw-r--r--sandbox/linux/seccomp-bpf/syscall_unittest.cc240
-rw-r--r--sandbox/linux/seccomp-bpf/trap.cc390
-rw-r--r--sandbox/linux/seccomp-bpf/trap.h85
-rw-r--r--sandbox/linux/seccomp-bpf/trap_unittest.cc28
-rw-r--r--sandbox/linux/services/credentials.cc299
-rw-r--r--sandbox/linux/services/credentials.h104
-rw-r--r--sandbox/linux/services/credentials_unittest.cc242
-rw-r--r--sandbox/linux/services/init_process_reaper.cc101
-rw-r--r--sandbox/linux/services/init_process_reaper.h25
-rw-r--r--sandbox/linux/services/namespace_sandbox.cc208
-rw-r--r--sandbox/linux/services/namespace_sandbox.h101
-rw-r--r--sandbox/linux/services/namespace_sandbox_unittest.cc217
-rw-r--r--sandbox/linux/services/namespace_utils.cc117
-rw-r--r--sandbox/linux/services/namespace_utils.h53
-rw-r--r--sandbox/linux/services/namespace_utils_unittest.cc72
-rw-r--r--sandbox/linux/services/proc_util.cc119
-rw-r--r--sandbox/linux/services/proc_util.h42
-rw-r--r--sandbox/linux/services/proc_util_unittest.cc62
-rw-r--r--sandbox/linux/services/resource_limits.cc26
-rw-r--r--sandbox/linux/services/resource_limits.h29
-rw-r--r--sandbox/linux/services/resource_limits_unittests.cc43
-rw-r--r--sandbox/linux/services/scoped_process.cc119
-rw-r--r--sandbox/linux/services/scoped_process.h55
-rw-r--r--sandbox/linux/services/scoped_process_unittest.cc130
-rw-r--r--sandbox/linux/services/syscall_wrappers.cc246
-rw-r--r--sandbox/linux/services/syscall_wrappers.h83
-rw-r--r--sandbox/linux/services/syscall_wrappers_unittest.cc99
-rw-r--r--sandbox/linux/services/thread_helpers.cc157
-rw-r--r--sandbox/linux/services/thread_helpers.h43
-rw-r--r--sandbox/linux/services/thread_helpers_unittests.cc147
-rw-r--r--sandbox/linux/services/yama.cc115
-rw-r--r--sandbox/linux/services/yama.h57
-rw-r--r--sandbox/linux/services/yama_unittests.cc172
-rw-r--r--sandbox/linux/syscall_broker/broker_channel.cc35
-rw-r--r--sandbox/linux/syscall_broker/broker_channel.h31
-rw-r--r--sandbox/linux/syscall_broker/broker_client.cc144
-rw-r--r--sandbox/linux/syscall_broker/broker_client.h75
-rw-r--r--sandbox/linux/syscall_broker/broker_file_permission.cc243
-rw-r--r--sandbox/linux/syscall_broker/broker_file_permission.h119
-rw-r--r--sandbox/linux/syscall_broker/broker_file_permission_unittest.cc262
-rw-r--r--sandbox/linux/syscall_broker/broker_host.cc231
-rw-r--r--sandbox/linux/syscall_broker/broker_host.h41
-rw-r--r--sandbox/linux/syscall_broker/broker_policy.cc99
-rw-r--r--sandbox/linux/syscall_broker/broker_policy.h87
-rw-r--r--sandbox/linux/syscall_broker/broker_process.cc120
-rw-r--r--sandbox/linux/syscall_broker/broker_process.h94
-rw-r--r--sandbox/linux/syscall_broker/broker_process_unittest.cc656
81 files changed, 13980 insertions, 0 deletions
diff --git a/sandbox/linux/bpf_dsl/bpf_dsl.cc b/sandbox/linux/bpf_dsl/bpf_dsl.cc
new file mode 100644
index 0000000000..3a35903ec9
--- /dev/null
+++ b/sandbox/linux/bpf_dsl/bpf_dsl.cc
@@ -0,0 +1,363 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/bpf_dsl/bpf_dsl.h"
+
+#include <limits>
+
+#include "base/logging.h"
+#include "base/memory/ref_counted.h"
+#include "sandbox/linux/bpf_dsl/bpf_dsl_impl.h"
+#include "sandbox/linux/bpf_dsl/policy_compiler.h"
+#include "sandbox/linux/seccomp-bpf/die.h"
+#include "sandbox/linux/seccomp-bpf/errorcode.h"
+
+namespace sandbox {
+namespace bpf_dsl {
+namespace {
+
+intptr_t BPFFailure(const struct arch_seccomp_data&, void* aux) {
+ SANDBOX_DIE(static_cast<char*>(aux));
+}
+
+class AllowResultExprImpl : public internal::ResultExprImpl {
+ public:
+ AllowResultExprImpl() {}
+
+ ErrorCode Compile(PolicyCompiler* pc) const override {
+ return ErrorCode(ErrorCode::ERR_ALLOWED);
+ }
+
+ bool IsAllow() const override { return true; }
+
+ private:
+ ~AllowResultExprImpl() override {}
+
+ DISALLOW_COPY_AND_ASSIGN(AllowResultExprImpl);
+};
+
+class ErrorResultExprImpl : public internal::ResultExprImpl {
+ public:
+ explicit ErrorResultExprImpl(int err) : err_(err) {
+ CHECK(err_ >= ErrorCode::ERR_MIN_ERRNO && err_ <= ErrorCode::ERR_MAX_ERRNO);
+ }
+
+ ErrorCode Compile(PolicyCompiler* pc) const override {
+ return pc->Error(err_);
+ }
+
+ bool IsDeny() const override { return true; }
+
+ private:
+ ~ErrorResultExprImpl() override {}
+
+ int err_;
+
+ DISALLOW_COPY_AND_ASSIGN(ErrorResultExprImpl);
+};
+
+class TraceResultExprImpl : public internal::ResultExprImpl {
+ public:
+ TraceResultExprImpl(uint16_t aux) : aux_(aux) {}
+
+ ErrorCode Compile(PolicyCompiler* pc) const override {
+ return ErrorCode(ErrorCode::ERR_TRACE + aux_);
+ }
+
+ private:
+ ~TraceResultExprImpl() override {}
+
+ uint16_t aux_;
+
+ DISALLOW_COPY_AND_ASSIGN(TraceResultExprImpl);
+};
+
+class TrapResultExprImpl : public internal::ResultExprImpl {
+ public:
+ TrapResultExprImpl(TrapRegistry::TrapFnc func, const void* arg, bool safe)
+ : func_(func), arg_(arg), safe_(safe) {
+ DCHECK(func_);
+ }
+
+ ErrorCode Compile(PolicyCompiler* pc) const override {
+ return pc->Trap(func_, arg_, safe_);
+ }
+
+ bool HasUnsafeTraps() const override { return safe_ == false; }
+
+ bool IsDeny() const override { return true; }
+
+ private:
+ ~TrapResultExprImpl() override {}
+
+ TrapRegistry::TrapFnc func_;
+ const void* arg_;
+ bool safe_;
+
+ DISALLOW_COPY_AND_ASSIGN(TrapResultExprImpl);
+};
+
+class IfThenResultExprImpl : public internal::ResultExprImpl {
+ public:
+ IfThenResultExprImpl(const BoolExpr& cond,
+ const ResultExpr& then_result,
+ const ResultExpr& else_result)
+ : cond_(cond), then_result_(then_result), else_result_(else_result) {}
+
+ ErrorCode Compile(PolicyCompiler* pc) const override {
+ return cond_->Compile(
+ pc, then_result_->Compile(pc), else_result_->Compile(pc));
+ }
+
+ bool HasUnsafeTraps() const override {
+ return then_result_->HasUnsafeTraps() || else_result_->HasUnsafeTraps();
+ }
+
+ private:
+ ~IfThenResultExprImpl() override {}
+
+ BoolExpr cond_;
+ ResultExpr then_result_;
+ ResultExpr else_result_;
+
+ DISALLOW_COPY_AND_ASSIGN(IfThenResultExprImpl);
+};
+
+class ConstBoolExprImpl : public internal::BoolExprImpl {
+ public:
+ ConstBoolExprImpl(bool value) : value_(value) {}
+
+ ErrorCode Compile(PolicyCompiler* pc,
+ ErrorCode true_ec,
+ ErrorCode false_ec) const override {
+ return value_ ? true_ec : false_ec;
+ }
+
+ private:
+ ~ConstBoolExprImpl() override {}
+
+ bool value_;
+
+ DISALLOW_COPY_AND_ASSIGN(ConstBoolExprImpl);
+};
+
+class PrimitiveBoolExprImpl : public internal::BoolExprImpl {
+ public:
+ PrimitiveBoolExprImpl(int argno,
+ ErrorCode::ArgType is_32bit,
+ uint64_t mask,
+ uint64_t value)
+ : argno_(argno), is_32bit_(is_32bit), mask_(mask), value_(value) {}
+
+ ErrorCode Compile(PolicyCompiler* pc,
+ ErrorCode true_ec,
+ ErrorCode false_ec) const override {
+ return pc->CondMaskedEqual(
+ argno_, is_32bit_, mask_, value_, true_ec, false_ec);
+ }
+
+ private:
+ ~PrimitiveBoolExprImpl() override {}
+
+ int argno_;
+ ErrorCode::ArgType is_32bit_;
+ uint64_t mask_;
+ uint64_t value_;
+
+ DISALLOW_COPY_AND_ASSIGN(PrimitiveBoolExprImpl);
+};
+
+class NegateBoolExprImpl : public internal::BoolExprImpl {
+ public:
+ explicit NegateBoolExprImpl(const BoolExpr& cond) : cond_(cond) {}
+
+ ErrorCode Compile(PolicyCompiler* pc,
+ ErrorCode true_ec,
+ ErrorCode false_ec) const override {
+ return cond_->Compile(pc, false_ec, true_ec);
+ }
+
+ private:
+ ~NegateBoolExprImpl() override {}
+
+ BoolExpr cond_;
+
+ DISALLOW_COPY_AND_ASSIGN(NegateBoolExprImpl);
+};
+
+class AndBoolExprImpl : public internal::BoolExprImpl {
+ public:
+ AndBoolExprImpl(const BoolExpr& lhs, const BoolExpr& rhs)
+ : lhs_(lhs), rhs_(rhs) {}
+
+ ErrorCode Compile(PolicyCompiler* pc,
+ ErrorCode true_ec,
+ ErrorCode false_ec) const override {
+ return lhs_->Compile(pc, rhs_->Compile(pc, true_ec, false_ec), false_ec);
+ }
+
+ private:
+ ~AndBoolExprImpl() override {}
+
+ BoolExpr lhs_;
+ BoolExpr rhs_;
+
+ DISALLOW_COPY_AND_ASSIGN(AndBoolExprImpl);
+};
+
+class OrBoolExprImpl : public internal::BoolExprImpl {
+ public:
+ OrBoolExprImpl(const BoolExpr& lhs, const BoolExpr& rhs)
+ : lhs_(lhs), rhs_(rhs) {}
+
+ ErrorCode Compile(PolicyCompiler* pc,
+ ErrorCode true_ec,
+ ErrorCode false_ec) const override {
+ return lhs_->Compile(pc, true_ec, rhs_->Compile(pc, true_ec, false_ec));
+ }
+
+ private:
+ ~OrBoolExprImpl() override {}
+
+ BoolExpr lhs_;
+ BoolExpr rhs_;
+
+ DISALLOW_COPY_AND_ASSIGN(OrBoolExprImpl);
+};
+
+} // namespace
+
+namespace internal {
+
+bool ResultExprImpl::HasUnsafeTraps() const {
+ return false;
+}
+
+bool ResultExprImpl::IsAllow() const {
+ return false;
+}
+
+bool ResultExprImpl::IsDeny() const {
+ return false;
+}
+
+uint64_t DefaultMask(size_t size) {
+ switch (size) {
+ case 4:
+ return std::numeric_limits<uint32_t>::max();
+ case 8:
+ return std::numeric_limits<uint64_t>::max();
+ default:
+ CHECK(false) << "Unimplemented DefaultMask case";
+ return 0;
+ }
+}
+
+BoolExpr ArgEq(int num, size_t size, uint64_t mask, uint64_t val) {
+ CHECK(size == 4 || size == 8);
+
+ // TODO(mdempsky): Should we just always use TP_64BIT?
+ const ErrorCode::ArgType arg_type =
+ (size == 4) ? ErrorCode::TP_32BIT : ErrorCode::TP_64BIT;
+
+ return BoolExpr(new const PrimitiveBoolExprImpl(num, arg_type, mask, val));
+}
+
+} // namespace internal
+
+ResultExpr Allow() {
+ return ResultExpr(new const AllowResultExprImpl());
+}
+
+ResultExpr Error(int err) {
+ return ResultExpr(new const ErrorResultExprImpl(err));
+}
+
+ResultExpr Kill(const char* msg) {
+ return Trap(BPFFailure, msg);
+}
+
+ResultExpr Trace(uint16_t aux) {
+ return ResultExpr(new const TraceResultExprImpl(aux));
+}
+
+ResultExpr Trap(TrapRegistry::TrapFnc trap_func, const void* aux) {
+ return ResultExpr(
+ new const TrapResultExprImpl(trap_func, aux, true /* safe */));
+}
+
+ResultExpr UnsafeTrap(TrapRegistry::TrapFnc trap_func, const void* aux) {
+ return ResultExpr(
+ new const TrapResultExprImpl(trap_func, aux, false /* unsafe */));
+}
+
+BoolExpr BoolConst(bool value) {
+ return BoolExpr(new const ConstBoolExprImpl(value));
+}
+
+BoolExpr operator!(const BoolExpr& cond) {
+ return BoolExpr(new const NegateBoolExprImpl(cond));
+}
+
+BoolExpr operator&&(const BoolExpr& lhs, const BoolExpr& rhs) {
+ return BoolExpr(new const AndBoolExprImpl(lhs, rhs));
+}
+
+BoolExpr operator||(const BoolExpr& lhs, const BoolExpr& rhs) {
+ return BoolExpr(new const OrBoolExprImpl(lhs, rhs));
+}
+
+Elser If(const BoolExpr& cond, const ResultExpr& then_result) {
+ return Elser(nullptr).ElseIf(cond, then_result);
+}
+
+Elser::Elser(cons::List<Clause> clause_list) : clause_list_(clause_list) {
+}
+
+Elser::Elser(const Elser& elser) : clause_list_(elser.clause_list_) {
+}
+
+Elser::~Elser() {
+}
+
+Elser Elser::ElseIf(const BoolExpr& cond, const ResultExpr& then_result) const {
+ return Elser(Cons(std::make_pair(cond, then_result), clause_list_));
+}
+
+ResultExpr Elser::Else(const ResultExpr& else_result) const {
+ // We finally have the default result expression for this
+ // if/then/else sequence. Also, we've already accumulated all
+ // if/then pairs into a list of reverse order (i.e., lower priority
+ // conditions are listed before higher priority ones). E.g., an
+ // expression like
+ //
+ // If(b1, e1).ElseIf(b2, e2).ElseIf(b3, e3).Else(e4)
+ //
+ // will have built up a list like
+ //
+ // [(b3, e3), (b2, e2), (b1, e1)].
+ //
+ // Now that we have e4, we can walk the list and create a ResultExpr
+ // tree like:
+ //
+ // expr = e4
+ // expr = (b3 ? e3 : expr) = (b3 ? e3 : e4)
+ // expr = (b2 ? e2 : expr) = (b2 ? e2 : (b3 ? e3 : e4))
+ // expr = (b1 ? e1 : expr) = (b1 ? e1 : (b2 ? e2 : (b3 ? e3 : e4)))
+ //
+ // and end up with an appropriately chained tree.
+
+ ResultExpr expr = else_result;
+ for (const Clause& clause : clause_list_) {
+ expr = ResultExpr(
+ new const IfThenResultExprImpl(clause.first, clause.second, expr));
+ }
+ return expr;
+}
+
+} // namespace bpf_dsl
+} // namespace sandbox
+
+template class scoped_refptr<const sandbox::bpf_dsl::internal::BoolExprImpl>;
+template class scoped_refptr<const sandbox::bpf_dsl::internal::ResultExprImpl>;
diff --git a/sandbox/linux/bpf_dsl/bpf_dsl.h b/sandbox/linux/bpf_dsl/bpf_dsl.h
new file mode 100644
index 0000000000..365e9b5466
--- /dev/null
+++ b/sandbox/linux/bpf_dsl/bpf_dsl.h
@@ -0,0 +1,317 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_BPF_DSL_BPF_DSL_H_
+#define SANDBOX_LINUX_BPF_DSL_BPF_DSL_H_
+
+#include <stdint.h>
+
+#include <utility>
+#include <vector>
+
+#include "base/macros.h"
+#include "base/memory/ref_counted.h"
+#include "sandbox/linux/bpf_dsl/bpf_dsl_forward.h"
+#include "sandbox/linux/bpf_dsl/cons.h"
+#include "sandbox/linux/bpf_dsl/trap_registry.h"
+#include "sandbox/sandbox_export.h"
+
+// The sandbox::bpf_dsl namespace provides a domain-specific language
+// to make writing BPF policies more expressive. In general, the
+// object types all have value semantics (i.e., they can be copied
+// around, returned from or passed to function calls, etc. without any
+// surprising side effects), though not all support assignment.
+//
+// An idiomatic and demonstrative (albeit silly) example of this API
+// would be:
+//
+// #include "sandbox/linux/bpf_dsl/bpf_dsl.h"
+//
+// using namespace sandbox::bpf_dsl;
+//
+// class SillyPolicy : public Policy {
+// public:
+// SillyPolicy() {}
+// ~SillyPolicy() override {}
+// ResultExpr EvaluateSyscall(int sysno) const override {
+// if (sysno == __NR_fcntl) {
+// Arg<int> fd(0), cmd(1);
+// Arg<unsigned long> flags(2);
+// const uint64_t kGoodFlags = O_ACCMODE | O_NONBLOCK;
+// return If(fd == 0 && cmd == F_SETFL && (flags & ~kGoodFlags) == 0,
+// Allow())
+// .ElseIf(cmd == F_DUPFD || cmd == F_DUPFD_CLOEXEC,
+// Error(EMFILE))
+// .Else(Trap(SetFlagHandler, NULL));
+// } else {
+// return Allow();
+// }
+// }
+//
+// private:
+// DISALLOW_COPY_AND_ASSIGN(SillyPolicy);
+// };
+//
+// More generally, the DSL currently supports the following grammar:
+//
+// result = Allow() | Error(errno) | Kill(msg) | Trace(aux)
+// | Trap(trap_func, aux) | UnsafeTrap(trap_func, aux)
+// | If(bool, result)[.ElseIf(bool, result)].Else(result)
+// | Switch(arg)[.Case(val, result)].Default(result)
+// bool = BoolConst(boolean) | !bool | bool && bool | bool || bool
+// | arg == val | arg != val
+// arg = Arg<T>(num) | arg & mask
+//
+// The semantics of each function and operator are intended to be
+// intuitive, but are described in more detail below.
+//
+// (Credit to Sean Parent's "Inheritance is the Base Class of Evil"
+// talk at Going Native 2013 for promoting value semantics via shared
+// pointers to immutable state.)
+
+namespace sandbox {
+namespace bpf_dsl {
+
+// ResultExpr is an opaque reference to an immutable result expression tree.
+typedef scoped_refptr<const internal::ResultExprImpl> ResultExpr;
+
+// BoolExpr is an opaque reference to an immutable boolean expression tree.
+typedef scoped_refptr<const internal::BoolExprImpl> BoolExpr;
+
+// Allow specifies a result that the system call should be allowed to
+// execute normally.
+SANDBOX_EXPORT ResultExpr Allow();
+
+// Error specifies a result that the system call should fail with
+// error number |err|. As a special case, Error(0) will result in the
+// system call appearing to have succeeded, but without having any
+// side effects.
+SANDBOX_EXPORT ResultExpr Error(int err);
+
+// Kill specifies a result to kill the program and print an error message.
+SANDBOX_EXPORT ResultExpr Kill(const char* msg);
+
+// Trace specifies a result to notify a tracing process via the
+// PTRACE_EVENT_SECCOMP event and allow it to change or skip the system call.
+// The value of |aux| will be available to the tracer via PTRACE_GETEVENTMSG.
+SANDBOX_EXPORT ResultExpr Trace(uint16_t aux);
+
+// Trap specifies a result that the system call should be handled by
+// trapping back into userspace and invoking |trap_func|, passing
+// |aux| as the second parameter.
+SANDBOX_EXPORT ResultExpr
+ Trap(TrapRegistry::TrapFnc trap_func, const void* aux);
+
+// UnsafeTrap is like Trap, except the policy is marked as "unsafe"
+// and allowed to use SandboxSyscall to invoke any system call.
+//
+// NOTE: This feature, by definition, disables all security features of
+// the sandbox. It should never be used in production, but it can be
+// very useful to diagnose code that is incompatible with the sandbox.
+// If even a single system call returns "UnsafeTrap", the security of
+// entire sandbox should be considered compromised.
+SANDBOX_EXPORT ResultExpr
+ UnsafeTrap(TrapRegistry::TrapFnc trap_func, const void* aux);
+
+// BoolConst converts a bool value into a BoolExpr.
+SANDBOX_EXPORT BoolExpr BoolConst(bool value);
+
+// Various ways to combine boolean expressions into more complex expressions.
+// They follow standard boolean algebra laws.
+SANDBOX_EXPORT BoolExpr operator!(const BoolExpr& cond);
+SANDBOX_EXPORT BoolExpr operator&&(const BoolExpr& lhs, const BoolExpr& rhs);
+SANDBOX_EXPORT BoolExpr operator||(const BoolExpr& lhs, const BoolExpr& rhs);
+
+template <typename T>
+class SANDBOX_EXPORT Arg {
+ public:
+ // Initializes the Arg to represent the |num|th system call
+ // argument (indexed from 0), which is of type |T|.
+ explicit Arg(int num);
+
+ Arg(const Arg& arg) : num_(arg.num_), mask_(arg.mask_) {}
+
+ // Returns an Arg representing the current argument, but after
+ // bitwise-and'ing it with |rhs|.
+ friend Arg operator&(const Arg& lhs, uint64_t rhs) {
+ return Arg(lhs.num_, lhs.mask_ & rhs);
+ }
+
+ // Returns a boolean expression comparing whether the system call argument
+ // (after applying any bitmasks, if appropriate) equals |rhs|.
+ friend BoolExpr operator==(const Arg& lhs, T rhs) { return lhs.EqualTo(rhs); }
+
+ // Returns a boolean expression comparing whether the system call argument
+ // (after applying any bitmasks, if appropriate) does not equal |rhs|.
+ friend BoolExpr operator!=(const Arg& lhs, T rhs) { return !(lhs == rhs); }
+
+ private:
+ Arg(int num, uint64_t mask) : num_(num), mask_(mask) {}
+
+ BoolExpr EqualTo(T val) const;
+
+ int num_;
+ uint64_t mask_;
+
+ DISALLOW_ASSIGN(Arg);
+};
+
+// If begins a conditional result expression predicated on the
+// specified boolean expression.
+SANDBOX_EXPORT Elser If(const BoolExpr& cond, const ResultExpr& then_result);
+
+class SANDBOX_EXPORT Elser {
+ public:
+ Elser(const Elser& elser);
+ ~Elser();
+
+ // ElseIf extends the conditional result expression with another
+ // "if then" clause, predicated on the specified boolean expression.
+ Elser ElseIf(const BoolExpr& cond, const ResultExpr& then_result) const;
+
+ // Else terminates a conditional result expression using |else_result| as
+ // the default fallback result expression.
+ ResultExpr Else(const ResultExpr& else_result) const;
+
+ private:
+ typedef std::pair<BoolExpr, ResultExpr> Clause;
+
+ explicit Elser(cons::List<Clause> clause_list);
+
+ cons::List<Clause> clause_list_;
+
+ friend Elser If(const BoolExpr&, const ResultExpr&);
+ template <typename T>
+ friend Caser<T> Switch(const Arg<T>&);
+ DISALLOW_ASSIGN(Elser);
+};
+
+// Switch begins a switch expression dispatched according to the
+// specified argument value.
+template <typename T>
+SANDBOX_EXPORT Caser<T> Switch(const Arg<T>& arg);
+
+template <typename T>
+class SANDBOX_EXPORT Caser {
+ public:
+ Caser(const Caser<T>& caser) : arg_(caser.arg_), elser_(caser.elser_) {}
+ ~Caser() {}
+
+ // Case adds a single-value "case" clause to the switch.
+ Caser<T> Case(T value, ResultExpr result) const;
+
+ // Cases adds a multiple-value "case" clause to the switch.
+ // See also the SANDBOX_BPF_DSL_CASES macro below for a more idiomatic way
+ // of using this function.
+ Caser<T> Cases(const std::vector<T>& values, ResultExpr result) const;
+
+ // Terminate the switch with a "default" clause.
+ ResultExpr Default(ResultExpr result) const;
+
+ private:
+ Caser(const Arg<T>& arg, Elser elser) : arg_(arg), elser_(elser) {}
+
+ Arg<T> arg_;
+ Elser elser_;
+
+ template <typename U>
+ friend Caser<U> Switch(const Arg<U>&);
+ DISALLOW_ASSIGN(Caser);
+};
+
+// Recommended usage is to put
+// #define CASES SANDBOX_BPF_DSL_CASES
+// near the top of the .cc file (e.g., nearby any "using" statements), then
+// use like:
+// Switch(arg).CASES((3, 5, 7), result)...;
+#define SANDBOX_BPF_DSL_CASES(values, result) \
+ Cases(SANDBOX_BPF_DSL_CASES_HELPER values, result)
+
+// Helper macro to construct a std::vector from an initializer list.
+// TODO(mdempsky): Convert to use C++11 initializer lists instead.
+#define SANDBOX_BPF_DSL_CASES_HELPER(value, ...) \
+ ({ \
+ const __typeof__(value) bpf_dsl_cases_values[] = {value, __VA_ARGS__}; \
+ std::vector<__typeof__(value)>( \
+ bpf_dsl_cases_values, \
+ bpf_dsl_cases_values + arraysize(bpf_dsl_cases_values)); \
+ })
+
+// =====================================================================
+// Official API ends here.
+// =====================================================================
+
+namespace internal {
+
+// Make argument-dependent lookup work. This is necessary because although
+// BoolExpr is defined in bpf_dsl, since it's merely a typedef for
+// scoped_refptr<const internal::BoolExplImpl>, argument-dependent lookup only
+// searches the "internal" nested namespace.
+using bpf_dsl::operator!;
+using bpf_dsl::operator||;
+using bpf_dsl::operator&&;
+
+// Returns a boolean expression that represents whether system call
+// argument |num| of size |size| is equal to |val|, when masked
+// according to |mask|. Users should use the Arg template class below
+// instead of using this API directly.
+SANDBOX_EXPORT BoolExpr
+ ArgEq(int num, size_t size, uint64_t mask, uint64_t val);
+
+// Returns the default mask for a system call argument of the specified size.
+SANDBOX_EXPORT uint64_t DefaultMask(size_t size);
+
+} // namespace internal
+
+template <typename T>
+Arg<T>::Arg(int num)
+ : num_(num), mask_(internal::DefaultMask(sizeof(T))) {
+}
+
+// Definition requires ArgEq to have been declared. Moved out-of-line
+// to minimize how much internal clutter users have to ignore while
+// reading the header documentation.
+//
+// Additionally, we use this helper member function to avoid linker errors
+// caused by defining operator== out-of-line. For a more detailed explanation,
+// see http://www.parashift.com/c++-faq-lite/template-friends.html.
+template <typename T>
+BoolExpr Arg<T>::EqualTo(T val) const {
+ return internal::ArgEq(num_, sizeof(T), mask_, static_cast<uint64_t>(val));
+}
+
+template <typename T>
+SANDBOX_EXPORT Caser<T> Switch(const Arg<T>& arg) {
+ return Caser<T>(arg, Elser(nullptr));
+}
+
+template <typename T>
+Caser<T> Caser<T>::Case(T value, ResultExpr result) const {
+ return SANDBOX_BPF_DSL_CASES((value), result);
+}
+
+template <typename T>
+Caser<T> Caser<T>::Cases(const std::vector<T>& values,
+ ResultExpr result) const {
+ // Theoretically we could evaluate arg_ just once and emit a more efficient
+ // dispatch table, but for now we simply translate into an equivalent
+ // If/ElseIf/Else chain.
+
+ typedef typename std::vector<T>::const_iterator Iter;
+ BoolExpr test = BoolConst(false);
+ for (Iter i = values.begin(), end = values.end(); i != end; ++i) {
+ test = test || (arg_ == *i);
+ }
+ return Caser<T>(arg_, elser_.ElseIf(test, result));
+}
+
+template <typename T>
+ResultExpr Caser<T>::Default(ResultExpr result) const {
+ return elser_.Else(result);
+}
+
+} // namespace bpf_dsl
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_BPF_DSL_BPF_DSL_H_
diff --git a/sandbox/linux/bpf_dsl/bpf_dsl_unittest.cc b/sandbox/linux/bpf_dsl/bpf_dsl_unittest.cc
new file mode 100644
index 0000000000..398ec59ef1
--- /dev/null
+++ b/sandbox/linux/bpf_dsl/bpf_dsl_unittest.cc
@@ -0,0 +1,486 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/bpf_dsl/bpf_dsl.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+
+#include <map>
+#include <utility>
+
+#include "base/files/scoped_file.h"
+#include "base/macros.h"
+#include "build/build_config.h"
+#include "sandbox/linux/bpf_dsl/bpf_dsl_impl.h"
+#include "sandbox/linux/bpf_dsl/codegen.h"
+#include "sandbox/linux/bpf_dsl/policy.h"
+#include "sandbox/linux/bpf_dsl/policy_compiler.h"
+#include "sandbox/linux/bpf_dsl/seccomp_macros.h"
+#include "sandbox/linux/bpf_dsl/trap_registry.h"
+#include "sandbox/linux/bpf_dsl/verifier.h"
+#include "sandbox/linux/seccomp-bpf/errorcode.h"
+#include "sandbox/linux/system_headers/linux_filter.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+#define CASES SANDBOX_BPF_DSL_CASES
+
+namespace sandbox {
+namespace bpf_dsl {
+namespace {
+
+// Helper function to construct fake arch_seccomp_data objects.
+struct arch_seccomp_data FakeSyscall(int nr,
+ uint64_t p0 = 0,
+ uint64_t p1 = 0,
+ uint64_t p2 = 0,
+ uint64_t p3 = 0,
+ uint64_t p4 = 0,
+ uint64_t p5 = 0) {
+ // Made up program counter for syscall address.
+ const uint64_t kFakePC = 0x543210;
+
+ struct arch_seccomp_data data = {
+ nr,
+ SECCOMP_ARCH,
+ kFakePC,
+ {
+ p0, p1, p2, p3, p4, p5,
+ },
+ };
+
+ return data;
+}
+
+class FakeTrapRegistry : public TrapRegistry {
+ public:
+ FakeTrapRegistry() : map_() {}
+ virtual ~FakeTrapRegistry() {}
+
+ uint16_t Add(TrapFnc fnc, const void* aux, bool safe) override {
+ EXPECT_TRUE(safe);
+
+ const uint16_t next_id = map_.size() + 1;
+ return map_.insert(std::make_pair(Key(fnc, aux), next_id)).first->second;
+ }
+
+ bool EnableUnsafeTraps() override {
+ ADD_FAILURE() << "Unimplemented";
+ return false;
+ }
+
+ private:
+ using Key = std::pair<TrapFnc, const void*>;
+
+ std::map<Key, uint16_t> map_;
+
+ DISALLOW_COPY_AND_ASSIGN(FakeTrapRegistry);
+};
+
+intptr_t FakeTrapFuncOne(const arch_seccomp_data& data, void* aux) { return 1; }
+intptr_t FakeTrapFuncTwo(const arch_seccomp_data& data, void* aux) { return 2; }
+
+// Test that FakeTrapRegistry correctly assigns trap IDs to trap handlers.
+TEST(FakeTrapRegistry, TrapIDs) {
+ struct {
+ TrapRegistry::TrapFnc fnc;
+ const void* aux;
+ } funcs[] = {
+ {FakeTrapFuncOne, nullptr},
+ {FakeTrapFuncTwo, nullptr},
+ {FakeTrapFuncOne, funcs},
+ {FakeTrapFuncTwo, funcs},
+ };
+
+ FakeTrapRegistry traps;
+
+ // Add traps twice to test that IDs are reused correctly.
+ for (int i = 0; i < 2; ++i) {
+ for (size_t j = 0; j < arraysize(funcs); ++j) {
+ // Trap IDs start at 1.
+ EXPECT_EQ(j + 1, traps.Add(funcs[j].fnc, funcs[j].aux, true));
+ }
+ }
+}
+
+class PolicyEmulator {
+ public:
+ explicit PolicyEmulator(const Policy* policy) : program_(), traps_() {
+ program_ = *PolicyCompiler(policy, &traps_).Compile(true /* verify */);
+ }
+ ~PolicyEmulator() {}
+
+ uint32_t Emulate(const struct arch_seccomp_data& data) const {
+ const char* err = nullptr;
+ uint32_t res = Verifier::EvaluateBPF(program_, data, &err);
+ if (err) {
+ ADD_FAILURE() << err;
+ return 0;
+ }
+ return res;
+ }
+
+ void ExpectAllow(const struct arch_seccomp_data& data) const {
+ EXPECT_EQ(SECCOMP_RET_ALLOW, Emulate(data));
+ }
+
+ void ExpectErrno(uint16_t err, const struct arch_seccomp_data& data) const {
+ EXPECT_EQ(SECCOMP_RET_ERRNO | err, Emulate(data));
+ }
+
+ private:
+ CodeGen::Program program_;
+ FakeTrapRegistry traps_;
+
+ DISALLOW_COPY_AND_ASSIGN(PolicyEmulator);
+};
+
+class BasicPolicy : public Policy {
+ public:
+ BasicPolicy() {}
+ ~BasicPolicy() override {}
+ ResultExpr EvaluateSyscall(int sysno) const override {
+ if (sysno == __NR_getpgid) {
+ const Arg<pid_t> pid(0);
+ return If(pid == 0, Error(EPERM)).Else(Error(EINVAL));
+ }
+ if (sysno == __NR_setuid) {
+ const Arg<uid_t> uid(0);
+ return If(uid != 42, Error(ESRCH)).Else(Error(ENOMEM));
+ }
+ return Allow();
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BasicPolicy);
+};
+
+TEST(BPFDSL, Basic) {
+ BasicPolicy policy;
+ PolicyEmulator emulator(&policy);
+
+ emulator.ExpectErrno(EPERM, FakeSyscall(__NR_getpgid, 0));
+ emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_getpgid, 1));
+
+ emulator.ExpectErrno(ENOMEM, FakeSyscall(__NR_setuid, 42));
+ emulator.ExpectErrno(ESRCH, FakeSyscall(__NR_setuid, 43));
+}
+
+/* On IA-32, socketpair() is implemented via socketcall(). :-( */
+#if !defined(ARCH_CPU_X86)
+class BooleanLogicPolicy : public Policy {
+ public:
+ BooleanLogicPolicy() {}
+ ~BooleanLogicPolicy() override {}
+ ResultExpr EvaluateSyscall(int sysno) const override {
+ if (sysno == __NR_socketpair) {
+ const Arg<int> domain(0), type(1), protocol(2);
+ return If(domain == AF_UNIX &&
+ (type == SOCK_STREAM || type == SOCK_DGRAM) &&
+ protocol == 0,
+ Error(EPERM)).Else(Error(EINVAL));
+ }
+ return Allow();
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BooleanLogicPolicy);
+};
+
+TEST(BPFDSL, BooleanLogic) {
+ BooleanLogicPolicy policy;
+ PolicyEmulator emulator(&policy);
+
+ const intptr_t kFakeSV = 0x12345;
+
+ // Acceptable combinations that should return EPERM.
+ emulator.ExpectErrno(
+ EPERM, FakeSyscall(__NR_socketpair, AF_UNIX, SOCK_STREAM, 0, kFakeSV));
+ emulator.ExpectErrno(
+ EPERM, FakeSyscall(__NR_socketpair, AF_UNIX, SOCK_DGRAM, 0, kFakeSV));
+
+ // Combinations that are invalid for only one reason; should return EINVAL.
+ emulator.ExpectErrno(
+ EINVAL, FakeSyscall(__NR_socketpair, AF_INET, SOCK_STREAM, 0, kFakeSV));
+ emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_socketpair, AF_UNIX,
+ SOCK_SEQPACKET, 0, kFakeSV));
+ emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_socketpair, AF_UNIX,
+ SOCK_STREAM, IPPROTO_TCP, kFakeSV));
+
+ // Completely unacceptable combination; should also return EINVAL.
+ emulator.ExpectErrno(
+ EINVAL, FakeSyscall(__NR_socketpair, AF_INET, SOCK_SEQPACKET, IPPROTO_UDP,
+ kFakeSV));
+}
+#endif // !ARCH_CPU_X86
+
+class MoreBooleanLogicPolicy : public Policy {
+ public:
+ MoreBooleanLogicPolicy() {}
+ ~MoreBooleanLogicPolicy() override {}
+ ResultExpr EvaluateSyscall(int sysno) const override {
+ if (sysno == __NR_setresuid) {
+ const Arg<uid_t> ruid(0), euid(1), suid(2);
+ return If(ruid == 0 || euid == 0 || suid == 0, Error(EPERM))
+ .ElseIf(ruid == 1 && euid == 1 && suid == 1, Error(EAGAIN))
+ .Else(Error(EINVAL));
+ }
+ return Allow();
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(MoreBooleanLogicPolicy);
+};
+
+TEST(BPFDSL, MoreBooleanLogic) {
+ MoreBooleanLogicPolicy policy;
+ PolicyEmulator emulator(&policy);
+
+ // Expect EPERM if any set to 0.
+ emulator.ExpectErrno(EPERM, FakeSyscall(__NR_setresuid, 0, 5, 5));
+ emulator.ExpectErrno(EPERM, FakeSyscall(__NR_setresuid, 5, 0, 5));
+ emulator.ExpectErrno(EPERM, FakeSyscall(__NR_setresuid, 5, 5, 0));
+
+ // Expect EAGAIN if all set to 1.
+ emulator.ExpectErrno(EAGAIN, FakeSyscall(__NR_setresuid, 1, 1, 1));
+
+ // Expect EINVAL for anything else.
+ emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_setresuid, 5, 1, 1));
+ emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_setresuid, 1, 5, 1));
+ emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_setresuid, 1, 1, 5));
+ emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_setresuid, 3, 4, 5));
+}
+
+static const uintptr_t kDeadBeefAddr =
+ static_cast<uintptr_t>(0xdeadbeefdeadbeefULL);
+
+class ArgSizePolicy : public Policy {
+ public:
+ ArgSizePolicy() {}
+ ~ArgSizePolicy() override {}
+ ResultExpr EvaluateSyscall(int sysno) const override {
+ if (sysno == __NR_uname) {
+ const Arg<uintptr_t> addr(0);
+ return If(addr == kDeadBeefAddr, Error(EPERM)).Else(Allow());
+ }
+ return Allow();
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArgSizePolicy);
+};
+
+TEST(BPFDSL, ArgSizeTest) {
+ ArgSizePolicy policy;
+ PolicyEmulator emulator(&policy);
+
+ emulator.ExpectAllow(FakeSyscall(__NR_uname, 0));
+ emulator.ExpectErrno(EPERM, FakeSyscall(__NR_uname, kDeadBeefAddr));
+}
+
+#if 0
+// TODO(mdempsky): This is really an integration test.
+
+class TrappingPolicy : public Policy {
+ public:
+ TrappingPolicy() {}
+ ~TrappingPolicy() override {}
+ ResultExpr EvaluateSyscall(int sysno) const override {
+ if (sysno == __NR_uname) {
+ return Trap(UnameTrap, &count_);
+ }
+ return Allow();
+ }
+
+ private:
+ static intptr_t count_;
+
+ static intptr_t UnameTrap(const struct arch_seccomp_data& data, void* aux) {
+ BPF_ASSERT_EQ(&count_, aux);
+ return ++count_;
+ }
+
+ DISALLOW_COPY_AND_ASSIGN(TrappingPolicy);
+};
+
+intptr_t TrappingPolicy::count_;
+
+BPF_TEST_C(BPFDSL, TrapTest, TrappingPolicy) {
+ ASSERT_SYSCALL_RESULT(1, uname, NULL);
+ ASSERT_SYSCALL_RESULT(2, uname, NULL);
+ ASSERT_SYSCALL_RESULT(3, uname, NULL);
+}
+#endif
+
+class MaskingPolicy : public Policy {
+ public:
+ MaskingPolicy() {}
+ ~MaskingPolicy() override {}
+ ResultExpr EvaluateSyscall(int sysno) const override {
+ if (sysno == __NR_setuid) {
+ const Arg<uid_t> uid(0);
+ return If((uid & 0xf) == 0, Error(EINVAL)).Else(Error(EACCES));
+ }
+ if (sysno == __NR_setgid) {
+ const Arg<gid_t> gid(0);
+ return If((gid & 0xf0) == 0xf0, Error(EINVAL)).Else(Error(EACCES));
+ }
+ if (sysno == __NR_setpgid) {
+ const Arg<pid_t> pid(0);
+ return If((pid & 0xa5) == 0xa0, Error(EINVAL)).Else(Error(EACCES));
+ }
+ return Allow();
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(MaskingPolicy);
+};
+
+TEST(BPFDSL, MaskTest) {
+ MaskingPolicy policy;
+ PolicyEmulator emulator(&policy);
+
+ for (uid_t uid = 0; uid < 0x100; ++uid) {
+ const int expect_errno = (uid & 0xf) == 0 ? EINVAL : EACCES;
+ emulator.ExpectErrno(expect_errno, FakeSyscall(__NR_setuid, uid));
+ }
+
+ for (gid_t gid = 0; gid < 0x100; ++gid) {
+ const int expect_errno = (gid & 0xf0) == 0xf0 ? EINVAL : EACCES;
+ emulator.ExpectErrno(expect_errno, FakeSyscall(__NR_setgid, gid));
+ }
+
+ for (pid_t pid = 0; pid < 0x100; ++pid) {
+ const int expect_errno = (pid & 0xa5) == 0xa0 ? EINVAL : EACCES;
+ emulator.ExpectErrno(expect_errno, FakeSyscall(__NR_setpgid, pid, 0));
+ }
+}
+
+class ElseIfPolicy : public Policy {
+ public:
+ ElseIfPolicy() {}
+ ~ElseIfPolicy() override {}
+ ResultExpr EvaluateSyscall(int sysno) const override {
+ if (sysno == __NR_setuid) {
+ const Arg<uid_t> uid(0);
+ return If((uid & 0xfff) == 0, Error(0))
+ .ElseIf((uid & 0xff0) == 0, Error(EINVAL))
+ .ElseIf((uid & 0xf00) == 0, Error(EEXIST))
+ .Else(Error(EACCES));
+ }
+ return Allow();
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ElseIfPolicy);
+};
+
+TEST(BPFDSL, ElseIfTest) {
+ ElseIfPolicy policy;
+ PolicyEmulator emulator(&policy);
+
+ emulator.ExpectErrno(0, FakeSyscall(__NR_setuid, 0));
+
+ emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_setuid, 0x0001));
+ emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_setuid, 0x0002));
+
+ emulator.ExpectErrno(EEXIST, FakeSyscall(__NR_setuid, 0x0011));
+ emulator.ExpectErrno(EEXIST, FakeSyscall(__NR_setuid, 0x0022));
+
+ emulator.ExpectErrno(EACCES, FakeSyscall(__NR_setuid, 0x0111));
+ emulator.ExpectErrno(EACCES, FakeSyscall(__NR_setuid, 0x0222));
+}
+
+class SwitchPolicy : public Policy {
+ public:
+ SwitchPolicy() {}
+ ~SwitchPolicy() override {}
+ ResultExpr EvaluateSyscall(int sysno) const override {
+ if (sysno == __NR_fcntl) {
+ const Arg<int> cmd(1);
+ const Arg<unsigned long> long_arg(2);
+ return Switch(cmd)
+ .CASES((F_GETFL, F_GETFD), Error(ENOENT))
+ .Case(F_SETFD, If(long_arg == O_CLOEXEC, Allow()).Else(Error(EINVAL)))
+ .Case(F_SETFL, Error(EPERM))
+ .Default(Error(EACCES));
+ }
+ return Allow();
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(SwitchPolicy);
+};
+
+TEST(BPFDSL, SwitchTest) {
+ SwitchPolicy policy;
+ PolicyEmulator emulator(&policy);
+
+ const int kFakeSockFD = 42;
+
+ emulator.ExpectErrno(ENOENT, FakeSyscall(__NR_fcntl, kFakeSockFD, F_GETFD));
+ emulator.ExpectErrno(ENOENT, FakeSyscall(__NR_fcntl, kFakeSockFD, F_GETFL));
+
+ emulator.ExpectAllow(
+ FakeSyscall(__NR_fcntl, kFakeSockFD, F_SETFD, O_CLOEXEC));
+ emulator.ExpectErrno(EINVAL,
+ FakeSyscall(__NR_fcntl, kFakeSockFD, F_SETFD, 0));
+
+ emulator.ExpectErrno(EPERM,
+ FakeSyscall(__NR_fcntl, kFakeSockFD, F_SETFL, O_RDONLY));
+
+ emulator.ExpectErrno(EACCES,
+ FakeSyscall(__NR_fcntl, kFakeSockFD, F_DUPFD, 0));
+}
+
+static intptr_t DummyTrap(const struct arch_seccomp_data& data, void* aux) {
+ return 0;
+}
+
+TEST(BPFDSL, IsAllowDeny) {
+ ResultExpr allow = Allow();
+ EXPECT_TRUE(allow->IsAllow());
+ EXPECT_FALSE(allow->IsDeny());
+
+ ResultExpr error = Error(ENOENT);
+ EXPECT_FALSE(error->IsAllow());
+ EXPECT_TRUE(error->IsDeny());
+
+ ResultExpr trace = Trace(42);
+ EXPECT_FALSE(trace->IsAllow());
+ EXPECT_FALSE(trace->IsDeny());
+
+ ResultExpr trap = Trap(DummyTrap, nullptr);
+ EXPECT_FALSE(trap->IsAllow());
+ EXPECT_TRUE(trap->IsDeny());
+
+ const Arg<int> arg(0);
+ ResultExpr maybe = If(arg == 0, Allow()).Else(Error(EPERM));
+ EXPECT_FALSE(maybe->IsAllow());
+ EXPECT_FALSE(maybe->IsDeny());
+}
+
+TEST(BPFDSL, HasUnsafeTraps) {
+ ResultExpr allow = Allow();
+ EXPECT_FALSE(allow->HasUnsafeTraps());
+
+ ResultExpr safe = Trap(DummyTrap, nullptr);
+ EXPECT_FALSE(safe->HasUnsafeTraps());
+
+ ResultExpr unsafe = UnsafeTrap(DummyTrap, nullptr);
+ EXPECT_TRUE(unsafe->HasUnsafeTraps());
+
+ const Arg<int> arg(0);
+ ResultExpr maybe = If(arg == 0, allow).Else(unsafe);
+ EXPECT_TRUE(maybe->HasUnsafeTraps());
+}
+
+} // namespace
+} // namespace bpf_dsl
+} // namespace sandbox
diff --git a/sandbox/linux/bpf_dsl/codegen.cc b/sandbox/linux/bpf_dsl/codegen.cc
new file mode 100644
index 0000000000..2d5c8e406e
--- /dev/null
+++ b/sandbox/linux/bpf_dsl/codegen.cc
@@ -0,0 +1,159 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/bpf_dsl/codegen.h"
+
+#include <limits>
+#include <utility>
+
+#include "base/logging.h"
+#include "sandbox/linux/system_headers/linux_filter.h"
+
+// This CodeGen implementation strives for simplicity while still
+// generating acceptable BPF programs under typical usage patterns
+// (e.g., by PolicyCompiler).
+//
+// The key to its simplicity is that BPF programs only support forward
+// jumps/branches, which allows constraining the DAG construction API
+// to make instruction nodes immutable. Immutable nodes admits a
+// simple greedy approach of emitting new instructions as needed and
+// then reusing existing ones that have already been emitted. This
+// cleanly avoids any need to compute basic blocks or apply
+// topological sorting because the API effectively sorts instructions
+// for us (e.g., before MakeInstruction() can be called to emit a
+// branch instruction, it must have already been called for each
+// branch path).
+//
+// This greedy algorithm is not without (theoretical) weakness though:
+//
+// 1. In the general case, we don't eliminate dead code. If needed,
+// we could trace back through the program in Compile() and elide
+// any unneeded instructions, but in practice we only emit live
+// instructions anyway.
+//
+// 2. By not dividing instructions into basic blocks and sorting, we
+// lose an opportunity to move non-branch/non-return instructions
+// adjacent to their successor instructions, which means we might
+// need to emit additional jumps. But in practice, they'll
+// already be nearby as long as callers don't go out of their way
+// to interleave MakeInstruction() calls for unrelated code
+// sequences.
+
+namespace sandbox {
+
+// kBranchRange is the maximum value that can be stored in
+// sock_filter's 8-bit jt and jf fields.
+const size_t kBranchRange = std::numeric_limits<uint8_t>::max();
+
+const CodeGen::Node CodeGen::kNullNode;
+
+CodeGen::CodeGen() : program_(), equivalent_(), memos_() {
+}
+
+CodeGen::~CodeGen() {
+}
+
+void CodeGen::Compile(CodeGen::Node head, Program* out) {
+ DCHECK(out);
+ out->assign(program_.rbegin() + Offset(head), program_.rend());
+}
+
+CodeGen::Node CodeGen::MakeInstruction(uint16_t code,
+ uint32_t k,
+ Node jt,
+ Node jf) {
+ // To avoid generating redundant code sequences, we memoize the
+ // results from AppendInstruction().
+ auto res = memos_.insert(std::make_pair(MemoKey(code, k, jt, jf), kNullNode));
+ CodeGen::Node* node = &res.first->second;
+ if (res.second) { // Newly inserted memo entry.
+ *node = AppendInstruction(code, k, jt, jf);
+ }
+ return *node;
+}
+
+CodeGen::Node CodeGen::AppendInstruction(uint16_t code,
+ uint32_t k,
+ Node jt,
+ Node jf) {
+ if (BPF_CLASS(code) == BPF_JMP) {
+ CHECK_NE(BPF_JA, BPF_OP(code)) << "CodeGen inserts JAs as needed";
+
+ // Optimally adding jumps is rather tricky, so we use a quick
+ // approximation: by artificially reducing |jt|'s range, |jt| will
+ // stay within its true range even if we add a jump for |jf|.
+ jt = WithinRange(jt, kBranchRange - 1);
+ jf = WithinRange(jf, kBranchRange);
+ return Append(code, k, Offset(jt), Offset(jf));
+ }
+
+ CHECK_EQ(kNullNode, jf) << "Non-branch instructions shouldn't provide jf";
+ if (BPF_CLASS(code) == BPF_RET) {
+ CHECK_EQ(kNullNode, jt) << "Return instructions shouldn't provide jt";
+ } else {
+ // For non-branch/non-return instructions, execution always
+ // proceeds to the next instruction; so we need to arrange for
+ // that to be |jt|.
+ jt = WithinRange(jt, 0);
+ CHECK_EQ(0U, Offset(jt)) << "ICE: Failed to setup next instruction";
+ }
+ return Append(code, k, 0, 0);
+}
+
+CodeGen::Node CodeGen::WithinRange(Node target, size_t range) {
+ // Just use |target| if it's already within range.
+ if (Offset(target) <= range) {
+ return target;
+ }
+
+ // Alternatively, look for an equivalent instruction within range.
+ if (Offset(equivalent_.at(target)) <= range) {
+ return equivalent_.at(target);
+ }
+
+ // Otherwise, fall back to emitting a jump instruction.
+ Node jump = Append(BPF_JMP | BPF_JA, Offset(target), 0, 0);
+ equivalent_.at(target) = jump;
+ return jump;
+}
+
+CodeGen::Node CodeGen::Append(uint16_t code, uint32_t k, size_t jt, size_t jf) {
+ if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
+ CHECK_LE(jt, kBranchRange);
+ CHECK_LE(jf, kBranchRange);
+ } else {
+ CHECK_EQ(0U, jt);
+ CHECK_EQ(0U, jf);
+ }
+
+ CHECK_LT(program_.size(), static_cast<size_t>(BPF_MAXINSNS));
+ CHECK_EQ(program_.size(), equivalent_.size());
+
+ Node res = program_.size();
+ program_.push_back(sock_filter{
+ code, static_cast<uint8_t>(jt), static_cast<uint8_t>(jf), k});
+ equivalent_.push_back(res);
+ return res;
+}
+
+size_t CodeGen::Offset(Node target) const {
+ CHECK_LT(target, program_.size()) << "Bogus offset target node";
+ return (program_.size() - 1) - target;
+}
+
+// TODO(mdempsky): Move into a general base::Tuple helper library.
+bool CodeGen::MemoKeyLess::operator()(const MemoKey& lhs,
+ const MemoKey& rhs) const {
+ if (base::get<0>(lhs) != base::get<0>(rhs))
+ return base::get<0>(lhs) < base::get<0>(rhs);
+ if (base::get<1>(lhs) != base::get<1>(rhs))
+ return base::get<1>(lhs) < base::get<1>(rhs);
+ if (base::get<2>(lhs) != base::get<2>(rhs))
+ return base::get<2>(lhs) < base::get<2>(rhs);
+ if (base::get<3>(lhs) != base::get<3>(rhs))
+ return base::get<3>(lhs) < base::get<3>(rhs);
+ return false;
+}
+
+} // namespace sandbox
diff --git a/sandbox/linux/bpf_dsl/codegen.h b/sandbox/linux/bpf_dsl/codegen.h
new file mode 100644
index 0000000000..9d898030b9
--- /dev/null
+++ b/sandbox/linux/bpf_dsl/codegen.h
@@ -0,0 +1,123 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_BPF_DSL_CODEGEN_H__
+#define SANDBOX_LINUX_BPF_DSL_CODEGEN_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <map>
+#include <vector>
+
+#include "base/macros.h"
+#include "base/tuple.h"
+#include "sandbox/sandbox_export.h"
+
+struct sock_filter;
+
+namespace sandbox {
+
+// The code generator implements a basic assembler that can convert a
+// graph of BPF instructions into a well-formed array of BPF
+// instructions. Most notably, it ensures that jumps are always
+// forward and don't exceed the limit of 255 instructions imposed by
+// the instruction set.
+//
+// Callers would typically create a new CodeGen object and then use it
+// to build a DAG of instruction nodes. They'll eventually call
+// Compile() to convert this DAG to a Program.
+//
+// CodeGen gen;
+// CodeGen::Node allow, branch, dag;
+//
+// allow =
+// gen.MakeInstruction(BPF_RET+BPF_K,
+// ErrorCode(ErrorCode::ERR_ALLOWED).err()));
+// branch =
+// gen.MakeInstruction(BPF_JMP+BPF_EQ+BPF_K, __NR_getpid,
+// Trap(GetPidHandler, NULL), allow);
+// dag =
+// gen.MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
+// offsetof(struct arch_seccomp_data, nr), branch);
+//
+// // Simplified code follows; in practice, it is important to avoid calling
+// // any C++ destructors after starting the sandbox.
+// CodeGen::Program program;
+// gen.Compile(dag, program);
+// const struct sock_fprog prog = {
+// static_cast<unsigned short>(program->size()), &program[0] };
+// prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+//
+class SANDBOX_EXPORT CodeGen {
+ public:
+ // A vector of BPF instructions that need to be installed as a filter
+ // program in the kernel.
+ typedef std::vector<struct sock_filter> Program;
+
+ // Node represents a node within the instruction DAG being compiled.
+ using Node = Program::size_type;
+
+ // kNullNode represents the "null" node; i.e., the reserved node
+ // value guaranteed to not equal any actual nodes.
+ static const Node kNullNode = -1;
+
+ CodeGen();
+ ~CodeGen();
+
+ // MakeInstruction creates a node representing the specified
+ // instruction, or returns and existing equivalent node if one
+ // exists. For details on the possible parameters refer to
+ // https://www.kernel.org/doc/Documentation/networking/filter.txt.
+ // TODO(mdempsky): Reconsider using default arguments here.
+ Node MakeInstruction(uint16_t code,
+ uint32_t k,
+ Node jt = kNullNode,
+ Node jf = kNullNode);
+
+ // Compile linearizes the instruction DAG rooted at |head| into a
+ // program that can be executed by a BPF virtual machine.
+ void Compile(Node head, Program* program);
+
+ private:
+ using MemoKey = base::Tuple<uint16_t, uint32_t, Node, Node>;
+ struct MemoKeyLess {
+ bool operator()(const MemoKey& lhs, const MemoKey& rhs) const;
+ };
+
+ // AppendInstruction adds a new instruction, ensuring that |jt| and
+ // |jf| are within range as necessary for |code|.
+ Node AppendInstruction(uint16_t code, uint32_t k, Node jt, Node jf);
+
+ // WithinRange returns a node equivalent to |next| that is at most
+ // |range| instructions away from the (logical) beginning of the
+ // program.
+ Node WithinRange(Node next, size_t range);
+
+ // Append appends a new instruction to the physical end (i.e.,
+ // logical beginning) of |program_|.
+ Node Append(uint16_t code, uint32_t k, size_t jt, size_t jf);
+
+ // Offset returns how many instructions exist in |program_| after |target|.
+ size_t Offset(Node target) const;
+
+ // NOTE: program_ is the compiled program in *reverse*, so that
+ // indices remain stable as we add instructions.
+ Program program_;
+
+ // equivalent_ stores the most recent semantically-equivalent node for each
+ // instruction in program_. A node is defined as semantically-equivalent to N
+ // if it has the same instruction code and constant as N and its successor
+ // nodes (if any) are semantically-equivalent to N's successor nodes, or
+ // if it's an unconditional jump to a node semantically-equivalent to N.
+ std::vector<Node> equivalent_;
+
+ std::map<MemoKey, Node, MemoKeyLess> memos_;
+
+ DISALLOW_COPY_AND_ASSIGN(CodeGen);
+};
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_BPF_DSL_CODEGEN_H__
diff --git a/sandbox/linux/bpf_dsl/codegen_unittest.cc b/sandbox/linux/bpf_dsl/codegen_unittest.cc
new file mode 100644
index 0000000000..5961822123
--- /dev/null
+++ b/sandbox/linux/bpf_dsl/codegen_unittest.cc
@@ -0,0 +1,402 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/bpf_dsl/codegen.h"
+
+#include <map>
+#include <utility>
+#include <vector>
+
+#include "base/macros.h"
+#include "base/md5.h"
+#include "base/strings/string_piece.h"
+#include "sandbox/linux/system_headers/linux_filter.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace sandbox {
+namespace {
+
+// Hash provides an abstraction for building "hash trees" from BPF
+// control flow graphs, and efficiently identifying equivalent graphs.
+//
+// For simplicity, we use MD5, because base happens to provide a
+// convenient API for its use. However, any collision-resistant hash
+// should suffice.
+class Hash {
+ public:
+ static const Hash kZero;
+
+ Hash() : digest_() {}
+
+ Hash(uint16_t code,
+ uint32_t k,
+ const Hash& jt = kZero,
+ const Hash& jf = kZero)
+ : digest_() {
+ base::MD5Context ctx;
+ base::MD5Init(&ctx);
+ HashValue(&ctx, code);
+ HashValue(&ctx, k);
+ HashValue(&ctx, jt);
+ HashValue(&ctx, jf);
+ base::MD5Final(&digest_, &ctx);
+ }
+
+ Hash(const Hash& hash) = default;
+ Hash& operator=(const Hash& rhs) = default;
+
+ friend bool operator==(const Hash& lhs, const Hash& rhs) {
+ return lhs.Base16() == rhs.Base16();
+ }
+ friend bool operator!=(const Hash& lhs, const Hash& rhs) {
+ return !(lhs == rhs);
+ }
+
+ private:
+ template <typename T>
+ void HashValue(base::MD5Context* ctx, const T& value) {
+ base::MD5Update(ctx,
+ base::StringPiece(reinterpret_cast<const char*>(&value),
+ sizeof(value)));
+ }
+
+ std::string Base16() const {
+ return base::MD5DigestToBase16(digest_);
+ }
+
+ base::MD5Digest digest_;
+};
+
+const Hash Hash::kZero;
+
+// Sanity check that equality and inequality work on Hash as required.
+TEST(CodeGen, HashSanity) {
+ std::vector<Hash> hashes;
+
+ // Push a bunch of logically distinct hashes.
+ hashes.push_back(Hash::kZero);
+ for (int i = 0; i < 4; ++i) {
+ hashes.push_back(Hash(i & 1, i & 2));
+ }
+ for (int i = 0; i < 16; ++i) {
+ hashes.push_back(Hash(i & 1, i & 2, Hash(i & 4, i & 8)));
+ }
+ for (int i = 0; i < 64; ++i) {
+ hashes.push_back(
+ Hash(i & 1, i & 2, Hash(i & 4, i & 8), Hash(i & 16, i & 32)));
+ }
+
+ for (const Hash& a : hashes) {
+ for (const Hash& b : hashes) {
+ // Hashes should equal themselves, but not equal all others.
+ if (&a == &b) {
+ EXPECT_EQ(a, b);
+ } else {
+ EXPECT_NE(a, b);
+ }
+ }
+ }
+}
+
+// ProgramTest provides a fixture for writing compiling sample
+// programs with CodeGen and verifying the linearized output matches
+// the input DAG.
+class ProgramTest : public ::testing::Test {
+ protected:
+ ProgramTest() : gen_(), node_hashes_() {}
+
+ // MakeInstruction calls CodeGen::MakeInstruction() and associated
+ // the returned address with a hash of the instruction.
+ CodeGen::Node MakeInstruction(uint16_t code,
+ uint32_t k,
+ CodeGen::Node jt = CodeGen::kNullNode,
+ CodeGen::Node jf = CodeGen::kNullNode) {
+ CodeGen::Node res = gen_.MakeInstruction(code, k, jt, jf);
+ EXPECT_NE(CodeGen::kNullNode, res);
+
+ Hash digest(code, k, Lookup(jt), Lookup(jf));
+ auto it = node_hashes_.insert(std::make_pair(res, digest));
+ EXPECT_EQ(digest, it.first->second);
+
+ return res;
+ }
+
+ // RunTest compiles the program and verifies that the output matches
+ // what is expected. It should be called at the end of each program
+ // test case.
+ void RunTest(CodeGen::Node head) {
+ // Compile the program
+ CodeGen::Program program;
+ gen_.Compile(head, &program);
+
+ // Walk the program backwards, and compute the hash for each instruction.
+ std::vector<Hash> prog_hashes(program.size());
+ for (size_t i = program.size(); i > 0; --i) {
+ const sock_filter& insn = program.at(i - 1);
+ Hash& hash = prog_hashes.at(i - 1);
+
+ if (BPF_CLASS(insn.code) == BPF_JMP) {
+ if (BPF_OP(insn.code) == BPF_JA) {
+ // The compiler adds JA instructions as needed, so skip them.
+ hash = prog_hashes.at(i + insn.k);
+ } else {
+ hash = Hash(insn.code, insn.k, prog_hashes.at(i + insn.jt),
+ prog_hashes.at(i + insn.jf));
+ }
+ } else if (BPF_CLASS(insn.code) == BPF_RET) {
+ hash = Hash(insn.code, insn.k);
+ } else {
+ hash = Hash(insn.code, insn.k, prog_hashes.at(i));
+ }
+ }
+
+ EXPECT_EQ(Lookup(head), prog_hashes.at(0));
+ }
+
+ private:
+ const Hash& Lookup(CodeGen::Node next) const {
+ if (next == CodeGen::kNullNode) {
+ return Hash::kZero;
+ }
+ auto it = node_hashes_.find(next);
+ if (it == node_hashes_.end()) {
+ ADD_FAILURE() << "No hash found for node " << next;
+ return Hash::kZero;
+ }
+ return it->second;
+ }
+
+ CodeGen gen_;
+ std::map<CodeGen::Node, Hash> node_hashes_;
+
+ DISALLOW_COPY_AND_ASSIGN(ProgramTest);
+};
+
+TEST_F(ProgramTest, OneInstruction) {
+ // Create the most basic valid BPF program:
+ // RET 0
+ CodeGen::Node head = MakeInstruction(BPF_RET + BPF_K, 0);
+ RunTest(head);
+}
+
+TEST_F(ProgramTest, SimpleBranch) {
+ // Create a program with a single branch:
+ // JUMP if eq 42 then $0 else $1
+ // 0: RET 1
+ // 1: RET 0
+ CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42,
+ MakeInstruction(BPF_RET + BPF_K, 1),
+ MakeInstruction(BPF_RET + BPF_K, 0));
+ RunTest(head);
+}
+
+TEST_F(ProgramTest, AtypicalBranch) {
+ // Create a program with a single branch:
+ // JUMP if eq 42 then $0 else $0
+ // 0: RET 0
+
+ CodeGen::Node ret = MakeInstruction(BPF_RET + BPF_K, 0);
+ CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret);
+
+ // N.B.: As the instructions in both sides of the branch are already
+ // the same object, we do not actually have any "mergeable" branches.
+ // This needs to be reflected in our choice of "flags".
+ RunTest(head);
+}
+
+TEST_F(ProgramTest, Complex) {
+ // Creates a basic BPF program that we'll use to test some of the code:
+ // JUMP if eq 42 the $0 else $1 (insn6)
+ // 0: LD 23 (insn5)
+ // 1: JUMP if eq 42 then $2 else $4 (insn4)
+ // 2: JUMP to $3 (insn2)
+ // 3: LD 42 (insn1)
+ // RET 42 (insn0)
+ // 4: LD 42 (insn3)
+ // RET 42 (insn3+)
+ CodeGen::Node insn0 = MakeInstruction(BPF_RET + BPF_K, 42);
+ CodeGen::Node insn1 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, insn0);
+ CodeGen::Node insn2 = insn1; // Implicit JUMP
+
+ // We explicitly duplicate instructions to test that they're merged.
+ CodeGen::Node insn3 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42,
+ MakeInstruction(BPF_RET + BPF_K, 42));
+ EXPECT_EQ(insn2, insn3);
+
+ CodeGen::Node insn4 =
+ MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn2, insn3);
+ CodeGen::Node insn5 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 23, insn4);
+
+ // Force a basic block that ends in neither a jump instruction nor a return
+ // instruction. It only contains "insn5". This exercises one of the less
+ // common code paths in the topo-sort algorithm.
+ // This also gives us a diamond-shaped pattern in our graph, which stresses
+ // another aspect of the topo-sort algorithm (namely, the ability to
+ // correctly count the incoming branches for subtrees that are not disjunct).
+ CodeGen::Node insn6 =
+ MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4);
+
+ RunTest(insn6);
+}
+
+TEST_F(ProgramTest, ConfusingTails) {
+ // This simple program demonstrates https://crbug.com/351103/
+ // The two "LOAD 0" instructions are blocks of their own. MergeTails() could
+ // be tempted to merge them since they are the same. However, they are
+ // not mergeable because they fall-through to non semantically equivalent
+ // blocks.
+ // Without the fix for this bug, this program should trigger the check in
+ // CompileAndCompare: the serialized graphs from the program and its compiled
+ // version will differ.
+ //
+ // 0) LOAD 1 // ???
+ // 1) if A == 0x1; then JMP 2 else JMP 3
+ // 2) LOAD 0 // System call number
+ // 3) if A == 0x2; then JMP 4 else JMP 5
+ // 4) LOAD 0 // System call number
+ // 5) if A == 0x1; then JMP 6 else JMP 7
+ // 6) RET 0
+ // 7) RET 1
+
+ CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1);
+ CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0);
+ CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);
+ CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);
+ CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
+ CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);
+ CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
+ CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
+
+ RunTest(i0);
+}
+
+TEST_F(ProgramTest, ConfusingTailsBasic) {
+ // Without the fix for https://crbug.com/351103/, (see
+ // SampleProgramConfusingTails()), this would generate a cyclic graph and
+ // crash as the two "LOAD 0" instructions would get merged.
+ //
+ // 0) LOAD 1 // ???
+ // 1) if A == 0x1; then JMP 2 else JMP 3
+ // 2) LOAD 0 // System call number
+ // 3) if A == 0x2; then JMP 4 else JMP 5
+ // 4) LOAD 0 // System call number
+ // 5) RET 1
+
+ CodeGen::Node i5 = MakeInstruction(BPF_RET + BPF_K, 1);
+ CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);
+ CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
+ CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);
+ CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
+ CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
+
+ RunTest(i0);
+}
+
+TEST_F(ProgramTest, ConfusingTailsMergeable) {
+ // This is similar to SampleProgramConfusingTails(), except that
+ // instructions 2 and 4 are now RET instructions.
+ // In PointerCompare(), this exercises the path where two blocks are of the
+ // same length and identical and the last instruction is a JMP or RET, so the
+ // following blocks don't need to be looked at and the blocks are mergeable.
+ //
+ // 0) LOAD 1 // ???
+ // 1) if A == 0x1; then JMP 2 else JMP 3
+ // 2) RET 42
+ // 3) if A == 0x2; then JMP 4 else JMP 5
+ // 4) RET 42
+ // 5) if A == 0x1; then JMP 6 else JMP 7
+ // 6) RET 0
+ // 7) RET 1
+
+ CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1);
+ CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0);
+ CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);
+ CodeGen::Node i4 = MakeInstruction(BPF_RET + BPF_K, 42);
+ CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
+ CodeGen::Node i2 = MakeInstruction(BPF_RET + BPF_K, 42);
+ CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
+ CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
+
+ RunTest(i0);
+}
+
+TEST_F(ProgramTest, InstructionFolding) {
+ // Check that simple instructions are folded as expected.
+ CodeGen::Node a = MakeInstruction(BPF_RET + BPF_K, 0);
+ EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0));
+ CodeGen::Node b = MakeInstruction(BPF_RET + BPF_K, 1);
+ EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0));
+ EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1));
+ EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1));
+
+ // Check that complex sequences are folded too.
+ CodeGen::Node c =
+ MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0,
+ MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b));
+ EXPECT_EQ(c, MakeInstruction(
+ BPF_LD + BPF_W + BPF_ABS, 0,
+ MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b)));
+
+ RunTest(c);
+}
+
+TEST_F(ProgramTest, FarBranches) {
+ // BPF instructions use 8-bit fields for branch offsets, which means
+ // branch targets must be within 255 instructions of the branch
+ // instruction. CodeGen abstracts away this detail by inserting jump
+ // instructions as needed, which we test here by generating programs
+ // that should trigger any interesting boundary conditions.
+
+ // Populate with 260 initial instruction nodes.
+ std::vector<CodeGen::Node> nodes;
+ nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0));
+ for (size_t i = 1; i < 260; ++i) {
+ nodes.push_back(
+ MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back()));
+ }
+
+ // Exhaustively test branch offsets near BPF's limits.
+ for (size_t jt = 250; jt < 260; ++jt) {
+ for (size_t jf = 250; jf < 260; ++jf) {
+ nodes.push_back(MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0,
+ nodes.rbegin()[jt], nodes.rbegin()[jf]));
+ RunTest(nodes.back());
+ }
+ }
+}
+
+TEST_F(ProgramTest, JumpReuse) {
+ // As a code size optimization, we try to reuse jumps when possible
+ // instead of emitting new ones. Here we make sure that optimization
+ // is working as intended.
+ //
+ // NOTE: To simplify testing, we rely on implementation details
+ // about what CodeGen::Node values indicate (i.e., vector indices),
+ // but CodeGen users should treat them as opaque values.
+
+ // Populate with 260 initial instruction nodes.
+ std::vector<CodeGen::Node> nodes;
+ nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0));
+ for (size_t i = 1; i < 260; ++i) {
+ nodes.push_back(
+ MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back()));
+ }
+
+ // Branching to nodes[0] and nodes[1] should require 3 new
+ // instructions: two far jumps plus the branch itself.
+ CodeGen::Node one =
+ MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, nodes[0], nodes[1]);
+ EXPECT_EQ(nodes.back() + 3, one); // XXX: Implementation detail!
+ RunTest(one);
+
+ // Branching again to the same target nodes should require only one
+ // new instruction, as we can reuse the previous branch's jumps.
+ CodeGen::Node two =
+ MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, nodes[0], nodes[1]);
+ EXPECT_EQ(one + 1, two); // XXX: Implementation detail!
+ RunTest(two);
+}
+
+} // namespace
+} // namespace sandbox
diff --git a/sandbox/linux/bpf_dsl/dump_bpf.cc b/sandbox/linux/bpf_dsl/dump_bpf.cc
new file mode 100644
index 0000000000..d0c8f75073
--- /dev/null
+++ b/sandbox/linux/bpf_dsl/dump_bpf.cc
@@ -0,0 +1,109 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/bpf_dsl/dump_bpf.h"
+
+#include <stdio.h>
+
+#include "sandbox/linux/bpf_dsl/codegen.h"
+#include "sandbox/linux/bpf_dsl/trap_registry.h"
+#include "sandbox/linux/system_headers/linux_filter.h"
+#include "sandbox/linux/system_headers/linux_seccomp.h"
+
+namespace sandbox {
+namespace bpf_dsl {
+
+void DumpBPF::PrintProgram(const CodeGen::Program& program) {
+ for (CodeGen::Program::const_iterator iter = program.begin();
+ iter != program.end();
+ ++iter) {
+ int ip = (int)(iter - program.begin());
+ fprintf(stderr, "%3d) ", ip);
+ switch (BPF_CLASS(iter->code)) {
+ case BPF_LD:
+ if (iter->code == BPF_LD + BPF_W + BPF_ABS) {
+ fprintf(stderr, "LOAD %d // ", (int)iter->k);
+ if (iter->k == offsetof(struct arch_seccomp_data, nr)) {
+ fprintf(stderr, "System call number\n");
+ } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) {
+ fprintf(stderr, "Architecture\n");
+ } else if (iter->k ==
+ offsetof(struct arch_seccomp_data, instruction_pointer)) {
+ fprintf(stderr, "Instruction pointer (LSB)\n");
+ } else if (iter->k ==
+ offsetof(struct arch_seccomp_data, instruction_pointer) +
+ 4) {
+ fprintf(stderr, "Instruction pointer (MSB)\n");
+ } else if (iter->k >= offsetof(struct arch_seccomp_data, args) &&
+ iter->k < offsetof(struct arch_seccomp_data, args) + 48 &&
+ (iter->k - offsetof(struct arch_seccomp_data, args)) % 4 ==
+ 0) {
+ fprintf(
+ stderr,
+ "Argument %d (%cSB)\n",
+ (int)(iter->k - offsetof(struct arch_seccomp_data, args)) / 8,
+ (iter->k - offsetof(struct arch_seccomp_data, args)) % 8 ? 'M'
+ : 'L');
+ } else {
+ fprintf(stderr, "???\n");
+ }
+ } else {
+ fprintf(stderr, "LOAD ???\n");
+ }
+ break;
+ case BPF_JMP:
+ if (BPF_OP(iter->code) == BPF_JA) {
+ fprintf(stderr, "JMP %d\n", ip + iter->k + 1);
+ } else {
+ fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n",
+ BPF_OP(iter->code) == BPF_JSET ? "&" :
+ BPF_OP(iter->code) == BPF_JEQ ? "==" :
+ BPF_OP(iter->code) == BPF_JGE ? ">=" :
+ BPF_OP(iter->code) == BPF_JGT ? ">" : "???",
+ (int)iter->k,
+ ip + iter->jt + 1, ip + iter->jf + 1);
+ }
+ break;
+ case BPF_RET:
+ fprintf(stderr, "RET 0x%x // ", iter->k);
+ if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) {
+ fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA);
+ } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
+ fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA);
+ } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRACE) {
+ fprintf(stderr, "Trace #%d\n", iter->k & SECCOMP_RET_DATA);
+ } else if (iter->k == SECCOMP_RET_ALLOW) {
+ fprintf(stderr, "Allowed\n");
+ } else {
+ fprintf(stderr, "???\n");
+ }
+ break;
+ case BPF_ALU:
+ if (BPF_OP(iter->code) == BPF_NEG) {
+ fprintf(stderr, "A := -A\n");
+ } else {
+ fprintf(stderr, "A := A %s 0x%x\n",
+ BPF_OP(iter->code) == BPF_ADD ? "+" :
+ BPF_OP(iter->code) == BPF_SUB ? "-" :
+ BPF_OP(iter->code) == BPF_MUL ? "*" :
+ BPF_OP(iter->code) == BPF_DIV ? "/" :
+ BPF_OP(iter->code) == BPF_MOD ? "%" :
+ BPF_OP(iter->code) == BPF_OR ? "|" :
+ BPF_OP(iter->code) == BPF_XOR ? "^" :
+ BPF_OP(iter->code) == BPF_AND ? "&" :
+ BPF_OP(iter->code) == BPF_LSH ? "<<" :
+ BPF_OP(iter->code) == BPF_RSH ? ">>" : "???",
+ (int)iter->k);
+ }
+ break;
+ default:
+ fprintf(stderr, "???\n");
+ break;
+ }
+ }
+ return;
+}
+
+} // namespace bpf_dsl
+} // namespace sandbox
diff --git a/sandbox/linux/bpf_dsl/dump_bpf.h b/sandbox/linux/bpf_dsl/dump_bpf.h
new file mode 100644
index 0000000000..cd12be793d
--- /dev/null
+++ b/sandbox/linux/bpf_dsl/dump_bpf.h
@@ -0,0 +1,18 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/bpf_dsl/codegen.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+namespace bpf_dsl {
+
+class SANDBOX_EXPORT DumpBPF {
+ public:
+ // PrintProgram writes |program| in a human-readable format to stderr.
+ static void PrintProgram(const CodeGen::Program& program);
+};
+
+} // namespace bpf_dsl
+} // namespace sandbox
diff --git a/sandbox/linux/bpf_dsl/policy.cc b/sandbox/linux/bpf_dsl/policy.cc
new file mode 100644
index 0000000000..c20edc6da8
--- /dev/null
+++ b/sandbox/linux/bpf_dsl/policy.cc
@@ -0,0 +1,19 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/bpf_dsl/policy.h"
+
+#include <errno.h>
+
+#include "sandbox/linux/bpf_dsl/bpf_dsl.h"
+
+namespace sandbox {
+namespace bpf_dsl {
+
+ResultExpr Policy::InvalidSyscall() const {
+ return Error(ENOSYS);
+}
+
+} // namespace bpf_dsl
+} // namespace sandbox
diff --git a/sandbox/linux/bpf_dsl/policy.h b/sandbox/linux/bpf_dsl/policy.h
new file mode 100644
index 0000000000..6c67589456
--- /dev/null
+++ b/sandbox/linux/bpf_dsl/policy.h
@@ -0,0 +1,37 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_BPF_DSL_POLICY_H_
+#define SANDBOX_LINUX_BPF_DSL_POLICY_H_
+
+#include "base/macros.h"
+#include "sandbox/linux/bpf_dsl/bpf_dsl_forward.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+namespace bpf_dsl {
+
+// Interface to implement to define a BPF sandbox policy.
+class SANDBOX_EXPORT Policy {
+ public:
+ Policy() {}
+ virtual ~Policy() {}
+
+ // User extension point for writing custom sandbox policies.
+ // The returned ResultExpr will control how the kernel responds to the
+ // specified system call number.
+ virtual ResultExpr EvaluateSyscall(int sysno) const = 0;
+
+ // Optional overload for specifying alternate behavior for invalid
+ // system calls. The default is to return ENOSYS.
+ virtual ResultExpr InvalidSyscall() const;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Policy);
+};
+
+} // namespace bpf_dsl
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_BPF_DSL_POLICY_H_
diff --git a/sandbox/linux/bpf_dsl/policy_compiler.cc b/sandbox/linux/bpf_dsl/policy_compiler.cc
new file mode 100644
index 0000000000..f38232f85f
--- /dev/null
+++ b/sandbox/linux/bpf_dsl/policy_compiler.cc
@@ -0,0 +1,499 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/bpf_dsl/policy_compiler.h"
+
+#include <errno.h>
+#include <sys/syscall.h>
+
+#include <limits>
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "sandbox/linux/bpf_dsl/bpf_dsl.h"
+#include "sandbox/linux/bpf_dsl/bpf_dsl_impl.h"
+#include "sandbox/linux/bpf_dsl/codegen.h"
+#include "sandbox/linux/bpf_dsl/dump_bpf.h"
+#include "sandbox/linux/bpf_dsl/policy.h"
+#include "sandbox/linux/bpf_dsl/seccomp_macros.h"
+#include "sandbox/linux/bpf_dsl/syscall_set.h"
+#include "sandbox/linux/bpf_dsl/verifier.h"
+#include "sandbox/linux/seccomp-bpf/errorcode.h"
+#include "sandbox/linux/system_headers/linux_filter.h"
+#include "sandbox/linux/system_headers/linux_seccomp.h"
+#include "sandbox/linux/system_headers/linux_syscalls.h"
+
+namespace sandbox {
+namespace bpf_dsl {
+
+namespace {
+
+#if defined(__i386__) || defined(__x86_64__)
+const bool kIsIntel = true;
+#else
+const bool kIsIntel = false;
+#endif
+#if defined(__x86_64__) && defined(__ILP32__)
+const bool kIsX32 = true;
+#else
+const bool kIsX32 = false;
+#endif
+
+const int kSyscallsRequiredForUnsafeTraps[] = {
+ __NR_rt_sigprocmask,
+ __NR_rt_sigreturn,
+#if defined(__NR_sigprocmask)
+ __NR_sigprocmask,
+#endif
+#if defined(__NR_sigreturn)
+ __NR_sigreturn,
+#endif
+};
+
+bool HasExactlyOneBit(uint64_t x) {
+ // Common trick; e.g., see http://stackoverflow.com/a/108329.
+ return x != 0 && (x & (x - 1)) == 0;
+}
+
+// A Trap() handler that returns an "errno" value. The value is encoded
+// in the "aux" parameter.
+intptr_t ReturnErrno(const struct arch_seccomp_data&, void* aux) {
+ // TrapFnc functions report error by following the native kernel convention
+ // of returning an exit code in the range of -1..-4096. They do not try to
+ // set errno themselves. The glibc wrapper that triggered the SIGSYS will
+ // ultimately do so for us.
+ int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;
+ return -err;
+}
+
+bool HasUnsafeTraps(const Policy* policy) {
+ DCHECK(policy);
+ for (uint32_t sysnum : SyscallSet::ValidOnly()) {
+ if (policy->EvaluateSyscall(sysnum)->HasUnsafeTraps()) {
+ return true;
+ }
+ }
+ return policy->InvalidSyscall()->HasUnsafeTraps();
+}
+
+} // namespace
+
+struct PolicyCompiler::Range {
+ uint32_t from;
+ CodeGen::Node node;
+};
+
+PolicyCompiler::PolicyCompiler(const Policy* policy, TrapRegistry* registry)
+ : policy_(policy),
+ registry_(registry),
+ escapepc_(0),
+ conds_(),
+ gen_(),
+ has_unsafe_traps_(HasUnsafeTraps(policy_)) {
+ DCHECK(policy);
+}
+
+PolicyCompiler::~PolicyCompiler() {
+}
+
+scoped_ptr<CodeGen::Program> PolicyCompiler::Compile(bool verify) {
+ CHECK(policy_->InvalidSyscall()->IsDeny())
+ << "Policies should deny invalid system calls";
+
+ // If our BPF program has unsafe traps, enable support for them.
+ if (has_unsafe_traps_) {
+ CHECK_NE(0U, escapepc_) << "UnsafeTrap() requires a valid escape PC";
+
+ for (int sysnum : kSyscallsRequiredForUnsafeTraps) {
+ CHECK(policy_->EvaluateSyscall(sysnum)->IsAllow())
+ << "Policies that use UnsafeTrap() must unconditionally allow all "
+ "required system calls";
+ }
+
+ CHECK(registry_->EnableUnsafeTraps())
+ << "We'd rather die than enable unsafe traps";
+ }
+
+ // Assemble the BPF filter program.
+ scoped_ptr<CodeGen::Program> program(new CodeGen::Program());
+ gen_.Compile(AssemblePolicy(), program.get());
+
+ // Make sure compilation resulted in a BPF program that executes
+ // correctly. Otherwise, there is an internal error in our BPF compiler.
+ // There is really nothing the caller can do until the bug is fixed.
+ if (verify) {
+ const char* err = nullptr;
+ if (!Verifier::VerifyBPF(this, *program, *policy_, &err)) {
+ DumpBPF::PrintProgram(*program);
+ LOG(FATAL) << err;
+ }
+ }
+
+ return program.Pass();
+}
+
+void PolicyCompiler::DangerousSetEscapePC(uint64_t escapepc) {
+ escapepc_ = escapepc;
+}
+
+CodeGen::Node PolicyCompiler::AssemblePolicy() {
+ // A compiled policy consists of three logical parts:
+ // 1. Check that the "arch" field matches the expected architecture.
+ // 2. If the policy involves unsafe traps, check if the syscall was
+ // invoked by Syscall::Call, and then allow it unconditionally.
+ // 3. Check the system call number and jump to the appropriate compiled
+ // system call policy number.
+ return CheckArch(MaybeAddEscapeHatch(DispatchSyscall()));
+}
+
+CodeGen::Node PolicyCompiler::CheckArch(CodeGen::Node passed) {
+ // If the architecture doesn't match SECCOMP_ARCH, disallow the
+ // system call.
+ return gen_.MakeInstruction(
+ BPF_LD + BPF_W + BPF_ABS, SECCOMP_ARCH_IDX,
+ gen_.MakeInstruction(
+ BPF_JMP + BPF_JEQ + BPF_K, SECCOMP_ARCH, passed,
+ CompileResult(Kill("Invalid audit architecture in BPF filter"))));
+}
+
+CodeGen::Node PolicyCompiler::MaybeAddEscapeHatch(CodeGen::Node rest) {
+ // If no unsafe traps, then simply return |rest|.
+ if (!has_unsafe_traps_) {
+ return rest;
+ }
+
+ // We already enabled unsafe traps in Compile, but enable them again to give
+ // the trap registry a second chance to complain before we add the backdoor.
+ CHECK(registry_->EnableUnsafeTraps());
+
+ // Allow system calls, if they originate from our magic return address.
+ const uint32_t lopc = static_cast<uint32_t>(escapepc_);
+ const uint32_t hipc = static_cast<uint32_t>(escapepc_ >> 32);
+
+ // BPF cannot do native 64-bit comparisons, so we have to compare
+ // both 32-bit halves of the instruction pointer. If they match what
+ // we expect, we return ERR_ALLOWED. If either or both don't match,
+ // we continue evalutating the rest of the sandbox policy.
+ //
+ // For simplicity, we check the full 64-bit instruction pointer even
+ // on 32-bit architectures.
+ return gen_.MakeInstruction(
+ BPF_LD + BPF_W + BPF_ABS, SECCOMP_IP_LSB_IDX,
+ gen_.MakeInstruction(
+ BPF_JMP + BPF_JEQ + BPF_K, lopc,
+ gen_.MakeInstruction(
+ BPF_LD + BPF_W + BPF_ABS, SECCOMP_IP_MSB_IDX,
+ gen_.MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, hipc,
+ CompileResult(Allow()), rest)),
+ rest));
+}
+
+CodeGen::Node PolicyCompiler::DispatchSyscall() {
+ // Evaluate all possible system calls and group their ErrorCodes into
+ // ranges of identical codes.
+ Ranges ranges;
+ FindRanges(&ranges);
+
+ // Compile the system call ranges to an optimized BPF jumptable
+ CodeGen::Node jumptable = AssembleJumpTable(ranges.begin(), ranges.end());
+
+ // Grab the system call number, so that we can check it and then
+ // execute the jump table.
+ return gen_.MakeInstruction(
+ BPF_LD + BPF_W + BPF_ABS, SECCOMP_NR_IDX, CheckSyscallNumber(jumptable));
+}
+
+CodeGen::Node PolicyCompiler::CheckSyscallNumber(CodeGen::Node passed) {
+ if (kIsIntel) {
+ // On Intel architectures, verify that system call numbers are in the
+ // expected number range.
+ CodeGen::Node invalidX32 =
+ CompileResult(Kill("Illegal mixing of system call ABIs"));
+ if (kIsX32) {
+ // The newer x32 API always sets bit 30.
+ return gen_.MakeInstruction(
+ BPF_JMP + BPF_JSET + BPF_K, 0x40000000, passed, invalidX32);
+ } else {
+ // The older i386 and x86-64 APIs clear bit 30 on all system calls.
+ return gen_.MakeInstruction(
+ BPF_JMP + BPF_JSET + BPF_K, 0x40000000, invalidX32, passed);
+ }
+ }
+
+ // TODO(mdempsky): Similar validation for other architectures?
+ return passed;
+}
+
+void PolicyCompiler::FindRanges(Ranges* ranges) {
+ // Please note that "struct seccomp_data" defines system calls as a signed
+ // int32_t, but BPF instructions always operate on unsigned quantities. We
+ // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,
+ // and then verifying that the rest of the number range (both positive and
+ // negative) all return the same ErrorCode.
+ const CodeGen::Node invalid_node = CompileResult(policy_->InvalidSyscall());
+ uint32_t old_sysnum = 0;
+ CodeGen::Node old_node =
+ SyscallSet::IsValid(old_sysnum)
+ ? CompileResult(policy_->EvaluateSyscall(old_sysnum))
+ : invalid_node;
+
+ for (uint32_t sysnum : SyscallSet::All()) {
+ CodeGen::Node node =
+ SyscallSet::IsValid(sysnum)
+ ? CompileResult(policy_->EvaluateSyscall(static_cast<int>(sysnum)))
+ : invalid_node;
+ // N.B., here we rely on CodeGen folding (i.e., returning the same
+ // node value for) identical code sequences, otherwise our jump
+ // table will blow up in size.
+ if (node != old_node) {
+ ranges->push_back(Range{old_sysnum, old_node});
+ old_sysnum = sysnum;
+ old_node = node;
+ }
+ }
+ ranges->push_back(Range{old_sysnum, old_node});
+}
+
+CodeGen::Node PolicyCompiler::AssembleJumpTable(Ranges::const_iterator start,
+ Ranges::const_iterator stop) {
+ // We convert the list of system call ranges into jump table that performs
+ // a binary search over the ranges.
+ // As a sanity check, we need to have at least one distinct ranges for us
+ // to be able to build a jump table.
+ CHECK(start < stop) << "Invalid iterator range";
+ const auto n = stop - start;
+ if (n == 1) {
+ // If we have narrowed things down to a single range object, we can
+ // return from the BPF filter program.
+ return start->node;
+ }
+
+ // Pick the range object that is located at the mid point of our list.
+ // We compare our system call number against the lowest valid system call
+ // number in this range object. If our number is lower, it is outside of
+ // this range object. If it is greater or equal, it might be inside.
+ Ranges::const_iterator mid = start + n / 2;
+
+ // Sub-divide the list of ranges and continue recursively.
+ CodeGen::Node jf = AssembleJumpTable(start, mid);
+ CodeGen::Node jt = AssembleJumpTable(mid, stop);
+ return gen_.MakeInstruction(BPF_JMP + BPF_JGE + BPF_K, mid->from, jt, jf);
+}
+
+CodeGen::Node PolicyCompiler::CompileResult(const ResultExpr& res) {
+ return RetExpression(res->Compile(this));
+}
+
+CodeGen::Node PolicyCompiler::RetExpression(const ErrorCode& err) {
+ switch (err.error_type()) {
+ case ErrorCode::ET_COND:
+ return CondExpression(err);
+ case ErrorCode::ET_SIMPLE:
+ case ErrorCode::ET_TRAP:
+ return gen_.MakeInstruction(BPF_RET + BPF_K, err.err());
+ default:
+ LOG(FATAL)
+ << "ErrorCode is not suitable for returning from a BPF program";
+ return CodeGen::kNullNode;
+ }
+}
+
+CodeGen::Node PolicyCompiler::CondExpression(const ErrorCode& cond) {
+ // Sanity check that |cond| makes sense.
+ CHECK(cond.argno_ >= 0 && cond.argno_ < 6) << "Invalid argument number "
+ << cond.argno_;
+ CHECK(cond.width_ == ErrorCode::TP_32BIT ||
+ cond.width_ == ErrorCode::TP_64BIT)
+ << "Invalid argument width " << cond.width_;
+ CHECK_NE(0U, cond.mask_) << "Zero mask is invalid";
+ CHECK_EQ(cond.value_, cond.value_ & cond.mask_)
+ << "Value contains masked out bits";
+ if (sizeof(void*) == 4) {
+ CHECK_EQ(ErrorCode::TP_32BIT, cond.width_)
+ << "Invalid width on 32-bit platform";
+ }
+ if (cond.width_ == ErrorCode::TP_32BIT) {
+ CHECK_EQ(0U, cond.mask_ >> 32) << "Mask exceeds argument size";
+ CHECK_EQ(0U, cond.value_ >> 32) << "Value exceeds argument size";
+ }
+
+ CodeGen::Node passed = RetExpression(*cond.passed_);
+ CodeGen::Node failed = RetExpression(*cond.failed_);
+
+ // We want to emit code to check "(arg & mask) == value" where arg, mask, and
+ // value are 64-bit values, but the BPF machine is only 32-bit. We implement
+ // this by independently testing the upper and lower 32-bits and continuing to
+ // |passed| if both evaluate true, or to |failed| if either evaluate false.
+ return CondExpressionHalf(cond,
+ UpperHalf,
+ CondExpressionHalf(cond, LowerHalf, passed, failed),
+ failed);
+}
+
+CodeGen::Node PolicyCompiler::CondExpressionHalf(const ErrorCode& cond,
+ ArgHalf half,
+ CodeGen::Node passed,
+ CodeGen::Node failed) {
+ if (cond.width_ == ErrorCode::TP_32BIT && half == UpperHalf) {
+ // Special logic for sanity checking the upper 32-bits of 32-bit system
+ // call arguments.
+
+ // TODO(mdempsky): Compile Unexpected64bitArgument() just per program.
+ CodeGen::Node invalid_64bit = RetExpression(Unexpected64bitArgument());
+
+ const uint32_t upper = SECCOMP_ARG_MSB_IDX(cond.argno_);
+ const uint32_t lower = SECCOMP_ARG_LSB_IDX(cond.argno_);
+
+ if (sizeof(void*) == 4) {
+ // On 32-bit platforms, the upper 32-bits should always be 0:
+ // LDW [upper]
+ // JEQ 0, passed, invalid
+ return gen_.MakeInstruction(
+ BPF_LD + BPF_W + BPF_ABS,
+ upper,
+ gen_.MakeInstruction(
+ BPF_JMP + BPF_JEQ + BPF_K, 0, passed, invalid_64bit));
+ }
+
+ // On 64-bit platforms, the upper 32-bits may be 0 or ~0; but we only allow
+ // ~0 if the sign bit of the lower 32-bits is set too:
+ // LDW [upper]
+ // JEQ 0, passed, (next)
+ // JEQ ~0, (next), invalid
+ // LDW [lower]
+ // JSET (1<<31), passed, invalid
+ //
+ // TODO(mdempsky): The JSET instruction could perhaps jump to passed->next
+ // instead, as the first instruction of passed should be "LDW [lower]".
+ return gen_.MakeInstruction(
+ BPF_LD + BPF_W + BPF_ABS,
+ upper,
+ gen_.MakeInstruction(
+ BPF_JMP + BPF_JEQ + BPF_K,
+ 0,
+ passed,
+ gen_.MakeInstruction(
+ BPF_JMP + BPF_JEQ + BPF_K,
+ std::numeric_limits<uint32_t>::max(),
+ gen_.MakeInstruction(
+ BPF_LD + BPF_W + BPF_ABS,
+ lower,
+ gen_.MakeInstruction(BPF_JMP + BPF_JSET + BPF_K,
+ 1U << 31,
+ passed,
+ invalid_64bit)),
+ invalid_64bit)));
+ }
+
+ const uint32_t idx = (half == UpperHalf) ? SECCOMP_ARG_MSB_IDX(cond.argno_)
+ : SECCOMP_ARG_LSB_IDX(cond.argno_);
+ const uint32_t mask = (half == UpperHalf) ? cond.mask_ >> 32 : cond.mask_;
+ const uint32_t value = (half == UpperHalf) ? cond.value_ >> 32 : cond.value_;
+
+ // Emit a suitable instruction sequence for (arg & mask) == value.
+
+ // For (arg & 0) == 0, just return passed.
+ if (mask == 0) {
+ CHECK_EQ(0U, value);
+ return passed;
+ }
+
+ // For (arg & ~0) == value, emit:
+ // LDW [idx]
+ // JEQ value, passed, failed
+ if (mask == std::numeric_limits<uint32_t>::max()) {
+ return gen_.MakeInstruction(
+ BPF_LD + BPF_W + BPF_ABS,
+ idx,
+ gen_.MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, value, passed, failed));
+ }
+
+ // For (arg & mask) == 0, emit:
+ // LDW [idx]
+ // JSET mask, failed, passed
+ // (Note: failed and passed are intentionally swapped.)
+ if (value == 0) {
+ return gen_.MakeInstruction(
+ BPF_LD + BPF_W + BPF_ABS,
+ idx,
+ gen_.MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, mask, failed, passed));
+ }
+
+ // For (arg & x) == x where x is a single-bit value, emit:
+ // LDW [idx]
+ // JSET mask, passed, failed
+ if (mask == value && HasExactlyOneBit(mask)) {
+ return gen_.MakeInstruction(
+ BPF_LD + BPF_W + BPF_ABS,
+ idx,
+ gen_.MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, mask, passed, failed));
+ }
+
+ // Generic fallback:
+ // LDW [idx]
+ // AND mask
+ // JEQ value, passed, failed
+ return gen_.MakeInstruction(
+ BPF_LD + BPF_W + BPF_ABS,
+ idx,
+ gen_.MakeInstruction(
+ BPF_ALU + BPF_AND + BPF_K,
+ mask,
+ gen_.MakeInstruction(
+ BPF_JMP + BPF_JEQ + BPF_K, value, passed, failed)));
+}
+
+ErrorCode PolicyCompiler::Unexpected64bitArgument() {
+ return Kill("Unexpected 64bit argument detected")->Compile(this);
+}
+
+ErrorCode PolicyCompiler::Error(int err) {
+ if (has_unsafe_traps_) {
+ // When inside an UnsafeTrap() callback, we want to allow all system calls.
+ // This means, we must conditionally disable the sandbox -- and that's not
+ // something that kernel-side BPF filters can do, as they cannot inspect
+ // any state other than the syscall arguments.
+ // But if we redirect all error handlers to user-space, then we can easily
+ // make this decision.
+ // The performance penalty for this extra round-trip to user-space is not
+ // actually that bad, as we only ever pay it for denied system calls; and a
+ // typical program has very few of these.
+ return Trap(ReturnErrno, reinterpret_cast<void*>(err), true);
+ }
+
+ return ErrorCode(err);
+}
+
+ErrorCode PolicyCompiler::Trap(TrapRegistry::TrapFnc fnc,
+ const void* aux,
+ bool safe) {
+ uint16_t trap_id = registry_->Add(fnc, aux, safe);
+ return ErrorCode(trap_id, fnc, aux, safe);
+}
+
+bool PolicyCompiler::IsRequiredForUnsafeTrap(int sysno) {
+ for (size_t i = 0; i < arraysize(kSyscallsRequiredForUnsafeTraps); ++i) {
+ if (sysno == kSyscallsRequiredForUnsafeTraps[i]) {
+ return true;
+ }
+ }
+ return false;
+}
+
+ErrorCode PolicyCompiler::CondMaskedEqual(int argno,
+ ErrorCode::ArgType width,
+ uint64_t mask,
+ uint64_t value,
+ const ErrorCode& passed,
+ const ErrorCode& failed) {
+ return ErrorCode(argno,
+ width,
+ mask,
+ value,
+ &*conds_.insert(passed).first,
+ &*conds_.insert(failed).first);
+}
+
+} // namespace bpf_dsl
+} // namespace sandbox
diff --git a/sandbox/linux/bpf_dsl/policy_compiler.h b/sandbox/linux/bpf_dsl/policy_compiler.h
new file mode 100644
index 0000000000..df38d4ccbc
--- /dev/null
+++ b/sandbox/linux/bpf_dsl/policy_compiler.h
@@ -0,0 +1,159 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_BPF_DSL_POLICY_COMPILER_H_
+#define SANDBOX_LINUX_BPF_DSL_POLICY_COMPILER_H_
+
+#include <stdint.h>
+
+#include <map>
+#include <set>
+#include <vector>
+
+#include "base/macros.h"
+#include "base/memory/scoped_ptr.h"
+#include "sandbox/linux/bpf_dsl/bpf_dsl_forward.h"
+#include "sandbox/linux/bpf_dsl/codegen.h"
+#include "sandbox/linux/seccomp-bpf/errorcode.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+namespace bpf_dsl {
+class Policy;
+
+// PolicyCompiler implements the bpf_dsl compiler, allowing users to
+// transform bpf_dsl policies into BPF programs to be executed by the
+// Linux kernel.
+class SANDBOX_EXPORT PolicyCompiler {
+ public:
+ PolicyCompiler(const Policy* policy, TrapRegistry* registry);
+ ~PolicyCompiler();
+
+ // Compile registers any trap handlers needed by the policy and
+ // compiles the policy to a BPF program, which it returns.
+ scoped_ptr<CodeGen::Program> Compile(bool verify);
+
+ // DangerousSetEscapePC sets the "escape PC" that is allowed to issue any
+ // system calls, regardless of policy.
+ void DangerousSetEscapePC(uint64_t escapepc);
+
+ // Error returns an ErrorCode to indicate the system call should fail with
+ // the specified error number.
+ ErrorCode Error(int err);
+
+ // Trap returns an ErrorCode to indicate the system call should
+ // instead invoke a trap handler.
+ ErrorCode Trap(TrapRegistry::TrapFnc fnc, const void* aux, bool safe);
+
+ // UnsafeTraps require some syscalls to always be allowed.
+ // This helper function returns true for these calls.
+ static bool IsRequiredForUnsafeTrap(int sysno);
+
+ // We can also use ErrorCode to request evaluation of a conditional
+ // statement based on inspection of system call parameters.
+ // This method wrap an ErrorCode object around the conditional statement.
+ // Argument "argno" (1..6) will be bitwise-AND'd with "mask" and compared
+ // to "value"; if equal, then "passed" will be returned, otherwise "failed".
+ // If "is32bit" is set, the argument must in the range of 0x0..(1u << 32 - 1)
+ // If it is outside this range, the sandbox treats the system call just
+ // the same as any other ABI violation (i.e. it aborts with an error
+ // message).
+ ErrorCode CondMaskedEqual(int argno,
+ ErrorCode::ArgType is_32bit,
+ uint64_t mask,
+ uint64_t value,
+ const ErrorCode& passed,
+ const ErrorCode& failed);
+
+ // Returns the fatal ErrorCode that is used to indicate that somebody
+ // attempted to pass a 64bit value in a 32bit system call argument.
+ // This method is primarily needed for testing purposes.
+ ErrorCode Unexpected64bitArgument();
+
+ private:
+ struct Range;
+ typedef std::vector<Range> Ranges;
+ typedef std::set<ErrorCode, struct ErrorCode::LessThan> Conds;
+
+ // Used by CondExpressionHalf to track which half of the argument it's
+ // emitting instructions for.
+ enum ArgHalf {
+ LowerHalf,
+ UpperHalf,
+ };
+
+ // Compile the configured policy into a complete instruction sequence.
+ CodeGen::Node AssemblePolicy();
+
+ // Return an instruction sequence that checks the
+ // arch_seccomp_data's "arch" field is valid, and then passes
+ // control to |passed| if so.
+ CodeGen::Node CheckArch(CodeGen::Node passed);
+
+ // If |has_unsafe_traps_| is true, returns an instruction sequence
+ // that allows all system calls from |escapepc_|, and otherwise
+ // passes control to |rest|. Otherwise, simply returns |rest|.
+ CodeGen::Node MaybeAddEscapeHatch(CodeGen::Node rest);
+
+ // Return an instruction sequence that loads and checks the system
+ // call number, performs a binary search, and then dispatches to an
+ // appropriate instruction sequence compiled from the current
+ // policy.
+ CodeGen::Node DispatchSyscall();
+
+ // Return an instruction sequence that checks the system call number
+ // (expected to be loaded in register A) and if valid, passes
+ // control to |passed| (with register A still valid).
+ CodeGen::Node CheckSyscallNumber(CodeGen::Node passed);
+
+ // Finds all the ranges of system calls that need to be handled. Ranges are
+ // sorted in ascending order of system call numbers. There are no gaps in the
+ // ranges. System calls with identical ErrorCodes are coalesced into a single
+ // range.
+ void FindRanges(Ranges* ranges);
+
+ // Returns a BPF program snippet that implements a jump table for the
+ // given range of system call numbers. This function runs recursively.
+ CodeGen::Node AssembleJumpTable(Ranges::const_iterator start,
+ Ranges::const_iterator stop);
+
+ // CompileResult compiles an individual result expression into a
+ // CodeGen node.
+ CodeGen::Node CompileResult(const ResultExpr& res);
+
+ // Returns a BPF program snippet that makes the BPF filter program exit
+ // with the given ErrorCode "err". N.B. the ErrorCode may very well be a
+ // conditional expression; if so, this function will recursively call
+ // CondExpression() and possibly RetExpression() to build a complex set of
+ // instructions.
+ CodeGen::Node RetExpression(const ErrorCode& err);
+
+ // Returns a BPF program that evaluates the conditional expression in
+ // "cond" and returns the appropriate value from the BPF filter program.
+ // This function recursively calls RetExpression(); it should only ever be
+ // called from RetExpression().
+ CodeGen::Node CondExpression(const ErrorCode& cond);
+
+ // Returns a BPF program that evaluates half of a conditional expression;
+ // it should only ever be called from CondExpression().
+ CodeGen::Node CondExpressionHalf(const ErrorCode& cond,
+ ArgHalf half,
+ CodeGen::Node passed,
+ CodeGen::Node failed);
+
+ const Policy* policy_;
+ TrapRegistry* registry_;
+ uint64_t escapepc_;
+
+ Conds conds_;
+ CodeGen gen_;
+ bool has_unsafe_traps_;
+
+ DISALLOW_COPY_AND_ASSIGN(PolicyCompiler);
+};
+
+} // namespace bpf_dsl
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_BPF_DSL_POLICY_COMPILER_H_
diff --git a/sandbox/linux/bpf_dsl/syscall_set.cc b/sandbox/linux/bpf_dsl/syscall_set.cc
new file mode 100644
index 0000000000..47810e99ac
--- /dev/null
+++ b/sandbox/linux/bpf_dsl/syscall_set.cc
@@ -0,0 +1,144 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/bpf_dsl/syscall_set.h"
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "sandbox/linux/bpf_dsl/linux_syscall_ranges.h"
+
+namespace sandbox {
+
+namespace {
+
+#if defined(__mips__) && (_MIPS_SIM == _MIPS_SIM_ABI32)
+// This is true for Mips O32 ABI.
+static_assert(MIN_SYSCALL == __NR_Linux, "min syscall number should be 4000");
+#else
+// This true for supported architectures (Intel and ARM EABI).
+static_assert(MIN_SYSCALL == 0u,
+ "min syscall should always be zero");
+#endif
+
+// SyscallRange represents an inclusive range of system call numbers.
+struct SyscallRange {
+ uint32_t first;
+ uint32_t last;
+};
+
+const SyscallRange kValidSyscallRanges[] = {
+ // First we iterate up to MAX_PUBLIC_SYSCALL, which is equal to MAX_SYSCALL
+ // on Intel architectures, but leaves room for private syscalls on ARM.
+ {MIN_SYSCALL, MAX_PUBLIC_SYSCALL},
+#if defined(__arm__)
+ // ARM EABI includes "ARM private" system calls starting at
+ // MIN_PRIVATE_SYSCALL, and a "ghost syscall private to the kernel" at
+ // MIN_GHOST_SYSCALL.
+ {MIN_PRIVATE_SYSCALL, MAX_PRIVATE_SYSCALL},
+ {MIN_GHOST_SYSCALL, MAX_SYSCALL},
+#endif
+};
+
+} // namespace
+
+SyscallSet::Iterator SyscallSet::begin() const {
+ return Iterator(set_, false);
+}
+
+SyscallSet::Iterator SyscallSet::end() const {
+ return Iterator(set_, true);
+}
+
+bool SyscallSet::IsValid(uint32_t num) {
+ for (const SyscallRange& range : kValidSyscallRanges) {
+ if (num >= range.first && num <= range.last) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool operator==(const SyscallSet& lhs, const SyscallSet& rhs) {
+ return (lhs.set_ == rhs.set_);
+}
+
+SyscallSet::Iterator::Iterator(Set set, bool done)
+ : set_(set), done_(done), num_(0) {
+ // If the set doesn't contain 0, we need to skip to the next element.
+ if (!done && set_ == (IsValid(num_) ? Set::INVALID_ONLY : Set::VALID_ONLY)) {
+ ++*this;
+ }
+}
+
+uint32_t SyscallSet::Iterator::operator*() const {
+ DCHECK(!done_);
+ return num_;
+}
+
+SyscallSet::Iterator& SyscallSet::Iterator::operator++() {
+ DCHECK(!done_);
+
+ num_ = NextSyscall();
+ if (num_ == 0) {
+ done_ = true;
+ }
+
+ return *this;
+}
+
+// NextSyscall returns the next system call in the iterated system
+// call set after |num_|, or 0 if no such system call exists.
+uint32_t SyscallSet::Iterator::NextSyscall() const {
+ const bool want_valid = (set_ != Set::INVALID_ONLY);
+ const bool want_invalid = (set_ != Set::VALID_ONLY);
+
+ for (const SyscallRange& range : kValidSyscallRanges) {
+ if (want_invalid && range.first > 0 && num_ < range.first - 1) {
+ // Even when iterating invalid syscalls, we only include the end points;
+ // so skip directly to just before the next (valid) range.
+ return range.first - 1;
+ }
+ if (want_valid && num_ < range.first) {
+ return range.first;
+ }
+ if (want_valid && num_ < range.last) {
+ return num_ + 1;
+ }
+ if (want_invalid && num_ <= range.last) {
+ return range.last + 1;
+ }
+ }
+
+ if (want_invalid) {
+ // BPF programs only ever operate on unsigned quantities. So,
+ // that's how we iterate; we return values from
+ // 0..0xFFFFFFFFu. But there are places, where the kernel might
+ // interpret system call numbers as signed quantities, so the
+ // boundaries between signed and unsigned values are potential
+ // problem cases. We want to explicitly return these values from
+ // our iterator.
+ if (num_ < 0x7FFFFFFFu)
+ return 0x7FFFFFFFu;
+ if (num_ < 0x80000000u)
+ return 0x80000000u;
+
+ if (num_ < 0xFFFFFFFFu)
+ return 0xFFFFFFFFu;
+ }
+
+ return 0;
+}
+
+bool operator==(const SyscallSet::Iterator& lhs,
+ const SyscallSet::Iterator& rhs) {
+ DCHECK(lhs.set_ == rhs.set_);
+ return (lhs.done_ == rhs.done_) && (lhs.num_ == rhs.num_);
+}
+
+bool operator!=(const SyscallSet::Iterator& lhs,
+ const SyscallSet::Iterator& rhs) {
+ return !(lhs == rhs);
+}
+
+} // namespace sandbox
diff --git a/sandbox/linux/bpf_dsl/syscall_set.h b/sandbox/linux/bpf_dsl/syscall_set.h
new file mode 100644
index 0000000000..b9f076d932
--- /dev/null
+++ b/sandbox/linux/bpf_dsl/syscall_set.h
@@ -0,0 +1,103 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_BPF_DSL_SYSCALL_SET_H__
+#define SANDBOX_LINUX_BPF_DSL_SYSCALL_SET_H__
+
+#include <stdint.h>
+
+#include <iterator>
+
+#include "base/macros.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+
+// Iterates over the entire system call range from 0..0xFFFFFFFFu. This
+// iterator is aware of how system calls look like and will skip quickly
+// over ranges that can't contain system calls. It iterates more slowly
+// whenever it reaches a range that is potentially problematic, returning
+// the last invalid value before a valid range of system calls, and the
+// first invalid value after a valid range of syscalls. It iterates over
+// individual values whenever it is in the normal range for system calls
+// (typically MIN_SYSCALL..MAX_SYSCALL).
+//
+// Example usage:
+// for (uint32_t sysnum : SyscallSet::All()) {
+// // Do something with sysnum.
+// }
+class SANDBOX_EXPORT SyscallSet {
+ public:
+ class Iterator;
+
+ SyscallSet(const SyscallSet& ss) : set_(ss.set_) {}
+ ~SyscallSet() {}
+
+ Iterator begin() const;
+ Iterator end() const;
+
+ // All returns a SyscallSet that contains both valid and invalid
+ // system call numbers.
+ static SyscallSet All() { return SyscallSet(Set::ALL); }
+
+ // ValidOnly returns a SyscallSet that contains only valid system
+ // call numbers.
+ static SyscallSet ValidOnly() { return SyscallSet(Set::VALID_ONLY); }
+
+ // InvalidOnly returns a SyscallSet that contains only invalid
+ // system call numbers, but still omits numbers in the middle of a
+ // range of invalid system call numbers.
+ static SyscallSet InvalidOnly() { return SyscallSet(Set::INVALID_ONLY); }
+
+ // IsValid returns whether |num| specifies a valid system call
+ // number.
+ static bool IsValid(uint32_t num);
+
+ private:
+ enum class Set { ALL, VALID_ONLY, INVALID_ONLY };
+
+ explicit SyscallSet(Set set) : set_(set) {}
+
+ Set set_;
+
+ friend bool operator==(const SyscallSet&, const SyscallSet&);
+ DISALLOW_ASSIGN(SyscallSet);
+};
+
+SANDBOX_EXPORT bool operator==(const SyscallSet& lhs, const SyscallSet& rhs);
+
+// Iterator provides C++ input iterator semantics for traversing a
+// SyscallSet.
+class SyscallSet::Iterator
+ : public std::iterator<std::input_iterator_tag, uint32_t> {
+ public:
+ Iterator(const Iterator& it)
+ : set_(it.set_), done_(it.done_), num_(it.num_) {}
+ ~Iterator() {}
+
+ uint32_t operator*() const;
+ Iterator& operator++();
+
+ private:
+ Iterator(Set set, bool done);
+
+ uint32_t NextSyscall() const;
+
+ Set set_;
+ bool done_;
+ uint32_t num_;
+
+ friend SyscallSet;
+ friend bool operator==(const Iterator&, const Iterator&);
+ DISALLOW_ASSIGN(Iterator);
+};
+
+SANDBOX_EXPORT bool operator==(const SyscallSet::Iterator& lhs,
+ const SyscallSet::Iterator& rhs);
+SANDBOX_EXPORT bool operator!=(const SyscallSet::Iterator& lhs,
+ const SyscallSet::Iterator& rhs);
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_BPF_DSL_SYSCALL_SET_H__
diff --git a/sandbox/linux/bpf_dsl/syscall_set_unittest.cc b/sandbox/linux/bpf_dsl/syscall_set_unittest.cc
new file mode 100644
index 0000000000..fafb6f6f73
--- /dev/null
+++ b/sandbox/linux/bpf_dsl/syscall_set_unittest.cc
@@ -0,0 +1,124 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/bpf_dsl/syscall_set.h"
+
+#include <stdint.h>
+
+#include "sandbox/linux/bpf_dsl/linux_syscall_ranges.h"
+#include "sandbox/linux/tests/unit_tests.h"
+
+namespace sandbox {
+
+namespace {
+
+const SyscallSet kSyscallSets[] = {
+ SyscallSet::All(),
+ SyscallSet::InvalidOnly(),
+};
+
+SANDBOX_TEST(SyscallSet, Monotonous) {
+ for (const SyscallSet& set : kSyscallSets) {
+ uint32_t prev = 0;
+ bool have_prev = false;
+ for (uint32_t sysnum : set) {
+ if (have_prev) {
+ SANDBOX_ASSERT(sysnum > prev);
+ } else if (set == SyscallSet::All()) {
+ // The iterator should start at 0.
+ SANDBOX_ASSERT(sysnum == 0);
+ }
+
+ prev = sysnum;
+ have_prev = true;
+ }
+
+ // The iterator should always return 0xFFFFFFFFu as the last value.
+ SANDBOX_ASSERT(have_prev);
+ SANDBOX_ASSERT(prev == 0xFFFFFFFFu);
+ }
+}
+
+// AssertRange checks that SyscallIterator produces all system call
+// numbers in the inclusive range [min, max].
+void AssertRange(uint32_t min, uint32_t max) {
+ SANDBOX_ASSERT(min < max);
+ uint32_t prev = min - 1;
+ for (uint32_t sysnum : SyscallSet::All()) {
+ if (sysnum >= min && sysnum <= max) {
+ SANDBOX_ASSERT(prev == sysnum - 1);
+ prev = sysnum;
+ }
+ }
+ SANDBOX_ASSERT(prev == max);
+}
+
+SANDBOX_TEST(SyscallSet, ValidSyscallRanges) {
+ AssertRange(MIN_SYSCALL, MAX_PUBLIC_SYSCALL);
+#if defined(__arm__)
+ AssertRange(MIN_PRIVATE_SYSCALL, MAX_PRIVATE_SYSCALL);
+ AssertRange(MIN_GHOST_SYSCALL, MAX_SYSCALL);
+#endif
+}
+
+SANDBOX_TEST(SyscallSet, InvalidSyscalls) {
+ static const uint32_t kExpected[] = {
+#if defined(__mips__)
+ 0,
+ MIN_SYSCALL - 1,
+#endif
+ MAX_PUBLIC_SYSCALL + 1,
+#if defined(__arm__)
+ MIN_PRIVATE_SYSCALL - 1,
+ MAX_PRIVATE_SYSCALL + 1,
+ MIN_GHOST_SYSCALL - 1,
+ MAX_SYSCALL + 1,
+#endif
+ 0x7FFFFFFFu,
+ 0x80000000u,
+ 0xFFFFFFFFu,
+ };
+
+ for (const SyscallSet& set : kSyscallSets) {
+ size_t i = 0;
+ for (uint32_t sysnum : set) {
+ if (!SyscallSet::IsValid(sysnum)) {
+ SANDBOX_ASSERT(i < arraysize(kExpected));
+ SANDBOX_ASSERT(kExpected[i] == sysnum);
+ ++i;
+ }
+ }
+ SANDBOX_ASSERT(i == arraysize(kExpected));
+ }
+}
+
+SANDBOX_TEST(SyscallSet, ValidOnlyIsOnlyValid) {
+ for (uint32_t sysnum : SyscallSet::ValidOnly()) {
+ SANDBOX_ASSERT(SyscallSet::IsValid(sysnum));
+ }
+}
+
+SANDBOX_TEST(SyscallSet, InvalidOnlyIsOnlyInvalid) {
+ for (uint32_t sysnum : SyscallSet::InvalidOnly()) {
+ SANDBOX_ASSERT(!SyscallSet::IsValid(sysnum));
+ }
+}
+
+SANDBOX_TEST(SyscallSet, AllIsValidOnlyPlusInvalidOnly) {
+ std::vector<uint32_t> merged;
+ const SyscallSet valid_only = SyscallSet::ValidOnly();
+ const SyscallSet invalid_only = SyscallSet::InvalidOnly();
+ std::merge(valid_only.begin(),
+ valid_only.end(),
+ invalid_only.begin(),
+ invalid_only.end(),
+ std::back_inserter(merged));
+
+ const SyscallSet all = SyscallSet::All();
+ SANDBOX_ASSERT(merged == std::vector<uint32_t>(all.begin(), all.end()));
+}
+
+} // namespace
+
+} // namespace sandbox
diff --git a/sandbox/linux/bpf_dsl/verifier.cc b/sandbox/linux/bpf_dsl/verifier.cc
new file mode 100644
index 0000000000..417c663e30
--- /dev/null
+++ b/sandbox/linux/bpf_dsl/verifier.cc
@@ -0,0 +1,396 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/bpf_dsl/verifier.h"
+
+#include <string.h>
+
+#include <limits>
+
+#include "sandbox/linux/bpf_dsl/bpf_dsl.h"
+#include "sandbox/linux/bpf_dsl/bpf_dsl_impl.h"
+#include "sandbox/linux/bpf_dsl/policy.h"
+#include "sandbox/linux/bpf_dsl/policy_compiler.h"
+#include "sandbox/linux/bpf_dsl/seccomp_macros.h"
+#include "sandbox/linux/bpf_dsl/syscall_set.h"
+#include "sandbox/linux/seccomp-bpf/errorcode.h"
+#include "sandbox/linux/system_headers/linux_filter.h"
+#include "sandbox/linux/system_headers/linux_seccomp.h"
+
+namespace sandbox {
+namespace bpf_dsl {
+
+namespace {
+
+const uint64_t kLower32Bits = std::numeric_limits<uint32_t>::max();
+const uint64_t kUpper32Bits = static_cast<uint64_t>(kLower32Bits) << 32;
+
+struct State {
+ State(const std::vector<struct sock_filter>& p,
+ const struct arch_seccomp_data& d)
+ : program(p), data(d), ip(0), accumulator(0), acc_is_valid(false) {}
+ const std::vector<struct sock_filter>& program;
+ const struct arch_seccomp_data& data;
+ unsigned int ip;
+ uint32_t accumulator;
+ bool acc_is_valid;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(State);
+};
+
+uint32_t EvaluateErrorCode(bpf_dsl::PolicyCompiler* compiler,
+ const ErrorCode& code,
+ const struct arch_seccomp_data& data) {
+ if (code.error_type() == ErrorCode::ET_SIMPLE ||
+ code.error_type() == ErrorCode::ET_TRAP) {
+ return code.err();
+ } else if (code.error_type() == ErrorCode::ET_COND) {
+ if (code.width() == ErrorCode::TP_32BIT &&
+ (data.args[code.argno()] >> 32) &&
+ (data.args[code.argno()] & 0xFFFFFFFF80000000ull) !=
+ 0xFFFFFFFF80000000ull) {
+ return compiler->Unexpected64bitArgument().err();
+ }
+ bool equal = (data.args[code.argno()] & code.mask()) == code.value();
+ return EvaluateErrorCode(compiler, equal ? *code.passed() : *code.failed(),
+ data);
+ } else {
+ return SECCOMP_RET_INVALID;
+ }
+}
+
+bool VerifyErrorCode(bpf_dsl::PolicyCompiler* compiler,
+ const std::vector<struct sock_filter>& program,
+ struct arch_seccomp_data* data,
+ const ErrorCode& root_code,
+ const ErrorCode& code,
+ const char** err) {
+ if (code.error_type() == ErrorCode::ET_SIMPLE ||
+ code.error_type() == ErrorCode::ET_TRAP) {
+ const uint32_t computed_ret = Verifier::EvaluateBPF(program, *data, err);
+ if (*err) {
+ return false;
+ }
+ const uint32_t policy_ret = EvaluateErrorCode(compiler, root_code, *data);
+ if (computed_ret != policy_ret) {
+ // For efficiency's sake, we'd much rather compare "computed_ret"
+ // against "code.err()". This works most of the time, but it doesn't
+ // always work for nested conditional expressions. The test values
+ // that we generate on the fly to probe expressions can trigger
+ // code flow decisions in multiple nodes of the decision tree, and the
+ // only way to compute the correct error code in that situation is by
+ // calling EvaluateErrorCode().
+ *err = "Exit code from BPF program doesn't match";
+ return false;
+ }
+ } else if (code.error_type() == ErrorCode::ET_COND) {
+ if (code.argno() < 0 || code.argno() >= 6) {
+ *err = "Invalid argument number in error code";
+ return false;
+ }
+
+ // TODO(mdempsky): The test values generated here try to provide good
+ // coverage for generated BPF instructions while avoiding combinatorial
+ // explosion on large policies. Ideally we would instead take a fuzzing-like
+ // approach and generate a bounded number of test cases regardless of policy
+ // size.
+
+ // Verify that we can check a value for simple equality.
+ data->args[code.argno()] = code.value();
+ if (!VerifyErrorCode(compiler, program, data, root_code, *code.passed(),
+ err)) {
+ return false;
+ }
+
+ // If mask ignores any bits, verify that setting those bits is still
+ // detected as equality.
+ uint64_t ignored_bits = ~code.mask();
+ if (code.width() == ErrorCode::TP_32BIT) {
+ ignored_bits = static_cast<uint32_t>(ignored_bits);
+ }
+ if ((ignored_bits & kLower32Bits) != 0) {
+ data->args[code.argno()] = code.value() | (ignored_bits & kLower32Bits);
+ if (!VerifyErrorCode(compiler, program, data, root_code, *code.passed(),
+ err)) {
+ return false;
+ }
+ }
+ if ((ignored_bits & kUpper32Bits) != 0) {
+ data->args[code.argno()] = code.value() | (ignored_bits & kUpper32Bits);
+ if (!VerifyErrorCode(compiler, program, data, root_code, *code.passed(),
+ err)) {
+ return false;
+ }
+ }
+
+ // Verify that changing bits included in the mask is detected as inequality.
+ if ((code.mask() & kLower32Bits) != 0) {
+ data->args[code.argno()] = code.value() ^ (code.mask() & kLower32Bits);
+ if (!VerifyErrorCode(compiler, program, data, root_code, *code.failed(),
+ err)) {
+ return false;
+ }
+ }
+ if ((code.mask() & kUpper32Bits) != 0) {
+ data->args[code.argno()] = code.value() ^ (code.mask() & kUpper32Bits);
+ if (!VerifyErrorCode(compiler, program, data, root_code, *code.failed(),
+ err)) {
+ return false;
+ }
+ }
+
+ if (code.width() == ErrorCode::TP_32BIT) {
+ // For 32-bit system call arguments, we emit additional instructions to
+ // validate the upper 32-bits. Here we test that validation.
+
+ // Arbitrary 64-bit values should be rejected.
+ data->args[code.argno()] = 1ULL << 32;
+ if (!VerifyErrorCode(compiler, program, data, root_code,
+ compiler->Unexpected64bitArgument(), err)) {
+ return false;
+ }
+
+ // Upper 32-bits set without the MSB of the lower 32-bits set should be
+ // rejected too.
+ data->args[code.argno()] = kUpper32Bits;
+ if (!VerifyErrorCode(compiler, program, data, root_code,
+ compiler->Unexpected64bitArgument(), err)) {
+ return false;
+ }
+ }
+ } else {
+ *err = "Attempting to return invalid error code from BPF program";
+ return false;
+ }
+ return true;
+}
+
+void Ld(State* state, const struct sock_filter& insn, const char** err) {
+ if (BPF_SIZE(insn.code) != BPF_W || BPF_MODE(insn.code) != BPF_ABS ||
+ insn.jt != 0 || insn.jf != 0) {
+ *err = "Invalid BPF_LD instruction";
+ return;
+ }
+ if (insn.k < sizeof(struct arch_seccomp_data) && (insn.k & 3) == 0) {
+ // We only allow loading of properly aligned 32bit quantities.
+ memcpy(&state->accumulator,
+ reinterpret_cast<const char*>(&state->data) + insn.k, 4);
+ } else {
+ *err = "Invalid operand in BPF_LD instruction";
+ return;
+ }
+ state->acc_is_valid = true;
+ return;
+}
+
+void Jmp(State* state, const struct sock_filter& insn, const char** err) {
+ if (BPF_OP(insn.code) == BPF_JA) {
+ if (state->ip + insn.k + 1 >= state->program.size() ||
+ state->ip + insn.k + 1 <= state->ip) {
+ compilation_failure:
+ *err = "Invalid BPF_JMP instruction";
+ return;
+ }
+ state->ip += insn.k;
+ } else {
+ if (BPF_SRC(insn.code) != BPF_K || !state->acc_is_valid ||
+ state->ip + insn.jt + 1 >= state->program.size() ||
+ state->ip + insn.jf + 1 >= state->program.size()) {
+ goto compilation_failure;
+ }
+ switch (BPF_OP(insn.code)) {
+ case BPF_JEQ:
+ if (state->accumulator == insn.k) {
+ state->ip += insn.jt;
+ } else {
+ state->ip += insn.jf;
+ }
+ break;
+ case BPF_JGT:
+ if (state->accumulator > insn.k) {
+ state->ip += insn.jt;
+ } else {
+ state->ip += insn.jf;
+ }
+ break;
+ case BPF_JGE:
+ if (state->accumulator >= insn.k) {
+ state->ip += insn.jt;
+ } else {
+ state->ip += insn.jf;
+ }
+ break;
+ case BPF_JSET:
+ if (state->accumulator & insn.k) {
+ state->ip += insn.jt;
+ } else {
+ state->ip += insn.jf;
+ }
+ break;
+ default:
+ goto compilation_failure;
+ }
+ }
+}
+
+uint32_t Ret(State*, const struct sock_filter& insn, const char** err) {
+ if (BPF_SRC(insn.code) != BPF_K) {
+ *err = "Invalid BPF_RET instruction";
+ return 0;
+ }
+ return insn.k;
+}
+
+void Alu(State* state, const struct sock_filter& insn, const char** err) {
+ if (BPF_OP(insn.code) == BPF_NEG) {
+ state->accumulator = -state->accumulator;
+ return;
+ } else {
+ if (BPF_SRC(insn.code) != BPF_K) {
+ *err = "Unexpected source operand in arithmetic operation";
+ return;
+ }
+ switch (BPF_OP(insn.code)) {
+ case BPF_ADD:
+ state->accumulator += insn.k;
+ break;
+ case BPF_SUB:
+ state->accumulator -= insn.k;
+ break;
+ case BPF_MUL:
+ state->accumulator *= insn.k;
+ break;
+ case BPF_DIV:
+ if (!insn.k) {
+ *err = "Illegal division by zero";
+ break;
+ }
+ state->accumulator /= insn.k;
+ break;
+ case BPF_MOD:
+ if (!insn.k) {
+ *err = "Illegal division by zero";
+ break;
+ }
+ state->accumulator %= insn.k;
+ break;
+ case BPF_OR:
+ state->accumulator |= insn.k;
+ break;
+ case BPF_XOR:
+ state->accumulator ^= insn.k;
+ break;
+ case BPF_AND:
+ state->accumulator &= insn.k;
+ break;
+ case BPF_LSH:
+ if (insn.k > 32) {
+ *err = "Illegal shift operation";
+ break;
+ }
+ state->accumulator <<= insn.k;
+ break;
+ case BPF_RSH:
+ if (insn.k > 32) {
+ *err = "Illegal shift operation";
+ break;
+ }
+ state->accumulator >>= insn.k;
+ break;
+ default:
+ *err = "Invalid operator in arithmetic operation";
+ break;
+ }
+ }
+}
+
+} // namespace
+
+bool Verifier::VerifyBPF(bpf_dsl::PolicyCompiler* compiler,
+ const std::vector<struct sock_filter>& program,
+ const bpf_dsl::Policy& policy,
+ const char** err) {
+ *err = NULL;
+ for (uint32_t sysnum : SyscallSet::All()) {
+ // We ideally want to iterate over the full system call range and values
+ // just above and just below this range. This gives us the full result set
+ // of the "evaluators".
+ // On Intel systems, this can fail in a surprising way, as a cleared bit 30
+ // indicates either i386 or x86-64; and a set bit 30 indicates x32. And
+ // unless we pay attention to setting this bit correctly, an early check in
+ // our BPF program will make us fail with a misleading error code.
+ struct arch_seccomp_data data = {static_cast<int>(sysnum),
+ static_cast<uint32_t>(SECCOMP_ARCH)};
+#if defined(__i386__) || defined(__x86_64__)
+#if defined(__x86_64__) && defined(__ILP32__)
+ if (!(sysnum & 0x40000000u)) {
+ continue;
+ }
+#else
+ if (sysnum & 0x40000000u) {
+ continue;
+ }
+#endif
+#endif
+ ErrorCode code = SyscallSet::IsValid(sysnum)
+ ? policy.EvaluateSyscall(sysnum)->Compile(compiler)
+ : policy.InvalidSyscall()->Compile(compiler);
+ if (!VerifyErrorCode(compiler, program, &data, code, code, err)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+uint32_t Verifier::EvaluateBPF(const std::vector<struct sock_filter>& program,
+ const struct arch_seccomp_data& data,
+ const char** err) {
+ *err = NULL;
+ if (program.size() < 1 || program.size() >= SECCOMP_MAX_PROGRAM_SIZE) {
+ *err = "Invalid program length";
+ return 0;
+ }
+ for (State state(program, data); !*err; ++state.ip) {
+ if (state.ip >= program.size()) {
+ *err = "Invalid instruction pointer in BPF program";
+ break;
+ }
+ const struct sock_filter& insn = program[state.ip];
+ switch (BPF_CLASS(insn.code)) {
+ case BPF_LD:
+ Ld(&state, insn, err);
+ break;
+ case BPF_JMP:
+ Jmp(&state, insn, err);
+ break;
+ case BPF_RET: {
+ uint32_t r = Ret(&state, insn, err);
+ switch (r & SECCOMP_RET_ACTION) {
+ case SECCOMP_RET_TRAP:
+ case SECCOMP_RET_ERRNO:
+ case SECCOMP_RET_TRACE:
+ case SECCOMP_RET_ALLOW:
+ break;
+ case SECCOMP_RET_KILL: // We don't ever generate this
+ case SECCOMP_RET_INVALID: // Should never show up in BPF program
+ default:
+ *err = "Unexpected return code found in BPF program";
+ return 0;
+ }
+ return r;
+ }
+ case BPF_ALU:
+ Alu(&state, insn, err);
+ break;
+ default:
+ *err = "Unexpected instruction in BPF program";
+ break;
+ }
+ }
+ return 0;
+}
+
+} // namespace bpf_dsl
+} // namespace sandbox
diff --git a/sandbox/linux/bpf_dsl/verifier.h b/sandbox/linux/bpf_dsl/verifier.h
new file mode 100644
index 0000000000..b0435d1aa1
--- /dev/null
+++ b/sandbox/linux/bpf_dsl/verifier.h
@@ -0,0 +1,57 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_BPF_DSL_VERIFIER_H__
+#define SANDBOX_LINUX_BPF_DSL_VERIFIER_H__
+
+#include <stdint.h>
+
+#include <vector>
+
+#include "base/macros.h"
+#include "sandbox/sandbox_export.h"
+
+struct sock_filter;
+
+namespace sandbox {
+struct arch_seccomp_data;
+
+namespace bpf_dsl {
+class Policy;
+class PolicyCompiler;
+
+class SANDBOX_EXPORT Verifier {
+ public:
+ // Evaluate the BPF program for all possible inputs and verify that it
+ // computes the correct result. We use the "evaluators" to determine
+ // the full set of possible inputs that we have to iterate over.
+ // Returns success, if the BPF filter accurately reflects the rules
+ // set by the "evaluators".
+ // Upon success, "err" is set to NULL. Upon failure, it contains a static
+ // error message that does not need to be free()'d.
+ static bool VerifyBPF(bpf_dsl::PolicyCompiler* compiler,
+ const std::vector<struct sock_filter>& program,
+ const bpf_dsl::Policy& policy,
+ const char** err);
+
+ // Evaluate a given BPF program for a particular set of system call
+ // parameters. If evaluation failed for any reason, "err" will be set to
+ // a non-NULL error string. Otherwise, the BPF program's result will be
+ // returned by the function and "err" is NULL.
+ // We do not actually implement the full BPF state machine, but only the
+ // parts that can actually be generated by our BPF compiler. If this code
+ // is used for purposes other than verifying the output of the sandbox's
+ // BPF compiler, we might have to extend this BPF interpreter.
+ static uint32_t EvaluateBPF(const std::vector<struct sock_filter>& program,
+ const struct arch_seccomp_data& data,
+ const char** err);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Verifier);
+};
+
+} // namespace bpf_dsl
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_BPF_DSL_VERIFIER_H__
diff --git a/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc b/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc
new file mode 100644
index 0000000000..8c679a3d41
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc
@@ -0,0 +1,270 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf-helpers/baseline_policy.h"
+
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "base/logging.h"
+#include "build/build_config.h"
+#include "sandbox/linux/bpf_dsl/bpf_dsl.h"
+#include "sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h"
+#include "sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h"
+#include "sandbox/linux/seccomp-bpf-helpers/syscall_sets.h"
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+#include "sandbox/linux/services/syscall_wrappers.h"
+#include "sandbox/linux/system_headers/linux_syscalls.h"
+
+// Changing this implementation will have an effect on *all* policies.
+// Currently this means: Renderer/Worker, GPU, Flash and NaCl.
+
+using sandbox::bpf_dsl::Allow;
+using sandbox::bpf_dsl::Arg;
+using sandbox::bpf_dsl::Error;
+using sandbox::bpf_dsl::If;
+using sandbox::bpf_dsl::ResultExpr;
+
+namespace sandbox {
+
+namespace {
+
+bool IsBaselinePolicyAllowed(int sysno) {
+ return SyscallSets::IsAllowedAddressSpaceAccess(sysno) ||
+ SyscallSets::IsAllowedBasicScheduler(sysno) ||
+ SyscallSets::IsAllowedEpoll(sysno) ||
+ SyscallSets::IsAllowedFileSystemAccessViaFd(sysno) ||
+ SyscallSets::IsAllowedFutex(sysno) ||
+ SyscallSets::IsAllowedGeneralIo(sysno) ||
+ SyscallSets::IsAllowedGetOrModifySocket(sysno) ||
+ SyscallSets::IsAllowedGettime(sysno) ||
+ SyscallSets::IsAllowedProcessStartOrDeath(sysno) ||
+ SyscallSets::IsAllowedSignalHandling(sysno) ||
+ SyscallSets::IsGetSimpleId(sysno) ||
+ SyscallSets::IsKernelInternalApi(sysno) ||
+#if defined(__arm__)
+ SyscallSets::IsArmPrivate(sysno) ||
+#endif
+#if defined(__mips__)
+ SyscallSets::IsMipsPrivate(sysno) ||
+#endif
+ SyscallSets::IsAllowedOperationOnFd(sysno);
+}
+
+// System calls that will trigger the crashing SIGSYS handler.
+bool IsBaselinePolicyWatched(int sysno) {
+ return SyscallSets::IsAdminOperation(sysno) ||
+ SyscallSets::IsAdvancedScheduler(sysno) ||
+ SyscallSets::IsAdvancedTimer(sysno) ||
+ SyscallSets::IsAsyncIo(sysno) ||
+ SyscallSets::IsDebug(sysno) ||
+ SyscallSets::IsEventFd(sysno) ||
+ SyscallSets::IsExtendedAttributes(sysno) ||
+ SyscallSets::IsFaNotify(sysno) ||
+ SyscallSets::IsFsControl(sysno) ||
+ SyscallSets::IsGlobalFSViewChange(sysno) ||
+ SyscallSets::IsGlobalProcessEnvironment(sysno) ||
+ SyscallSets::IsGlobalSystemStatus(sysno) ||
+ SyscallSets::IsInotify(sysno) ||
+ SyscallSets::IsKernelModule(sysno) ||
+ SyscallSets::IsKeyManagement(sysno) ||
+ SyscallSets::IsKill(sysno) ||
+ SyscallSets::IsMessageQueue(sysno) ||
+ SyscallSets::IsMisc(sysno) ||
+#if defined(__x86_64__)
+ SyscallSets::IsNetworkSocketInformation(sysno) ||
+#endif
+ SyscallSets::IsNuma(sysno) ||
+ SyscallSets::IsPrctl(sysno) ||
+ SyscallSets::IsProcessGroupOrSession(sysno) ||
+#if defined(__i386__) || defined(__mips__)
+ SyscallSets::IsSocketCall(sysno) ||
+#endif
+#if defined(__arm__)
+ SyscallSets::IsArmPciConfig(sysno) ||
+#endif
+#if defined(__mips__)
+ SyscallSets::IsMipsMisc(sysno) ||
+#endif
+ SyscallSets::IsTimer(sysno);
+}
+
+// |fs_denied_errno| is the errno return for denied filesystem access.
+ResultExpr EvaluateSyscallImpl(int fs_denied_errno,
+ pid_t current_pid,
+ int sysno) {
+#if defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || \
+ defined(MEMORY_SANITIZER)
+ // TCGETS is required by the sanitizers on failure.
+ if (sysno == __NR_ioctl) {
+ return RestrictIoctl();
+ }
+
+ if (sysno == __NR_sched_getaffinity) {
+ return Allow();
+ }
+
+ // Used when RSS limiting is enabled in sanitizers.
+ if (sysno == __NR_getrusage) {
+ return RestrictGetrusage();
+ }
+
+ if (sysno == __NR_sigaltstack) {
+ // Required for better stack overflow detection in ASan. Disallowed in
+ // non-ASan builds.
+ return Allow();
+ }
+#endif // defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) ||
+ // defined(MEMORY_SANITIZER)
+
+ if (IsBaselinePolicyAllowed(sysno)) {
+ return Allow();
+ }
+
+#if defined(OS_ANDROID)
+ // Needed for thread creation.
+ if (sysno == __NR_sigaltstack)
+ return Allow();
+#endif
+
+ if (sysno == __NR_clock_gettime) {
+ return RestrictClockID();
+ }
+
+ if (sysno == __NR_clone) {
+ return RestrictCloneToThreadsAndEPERMFork();
+ }
+
+ if (sysno == __NR_fcntl)
+ return RestrictFcntlCommands();
+
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ if (sysno == __NR_fcntl64)
+ return RestrictFcntlCommands();
+#endif
+
+#if !defined(__aarch64__)
+ // fork() is never used as a system call (clone() is used instead), but we
+ // have seen it in fallback code on Android.
+ if (sysno == __NR_fork) {
+ return Error(EPERM);
+ }
+#endif
+
+ if (sysno == __NR_futex)
+ return RestrictFutex();
+
+ if (sysno == __NR_set_robust_list)
+ return Error(EPERM);
+
+ if (sysno == __NR_getpriority || sysno ==__NR_setpriority)
+ return RestrictGetSetpriority(current_pid);
+
+ if (sysno == __NR_madvise) {
+ // Only allow MADV_DONTNEED (aka MADV_FREE).
+ const Arg<int> advice(2);
+ return If(advice == MADV_DONTNEED, Allow()).Else(Error(EPERM));
+ }
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \
+ defined(__aarch64__)
+ if (sysno == __NR_mmap)
+ return RestrictMmapFlags();
+#endif
+
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ if (sysno == __NR_mmap2)
+ return RestrictMmapFlags();
+#endif
+
+ if (sysno == __NR_mprotect)
+ return RestrictMprotectFlags();
+
+ if (sysno == __NR_prctl)
+ return RestrictPrctl();
+
+#if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \
+ defined(__aarch64__)
+ if (sysno == __NR_socketpair) {
+ // Only allow AF_UNIX, PF_UNIX. Crash if anything else is seen.
+ static_assert(AF_UNIX == PF_UNIX,
+ "af_unix and pf_unix should not be different");
+ const Arg<int> domain(0);
+ return If(domain == AF_UNIX, Allow()).Else(CrashSIGSYS());
+ }
+#endif
+
+ if (SyscallSets::IsKill(sysno)) {
+ return RestrictKillTarget(current_pid, sysno);
+ }
+
+ if (SyscallSets::IsFileSystem(sysno) ||
+ SyscallSets::IsCurrentDirectory(sysno)) {
+ return Error(fs_denied_errno);
+ }
+
+ if (SyscallSets::IsSeccomp(sysno))
+ return Error(EPERM);
+
+ if (SyscallSets::IsAnySystemV(sysno)) {
+ return Error(EPERM);
+ }
+
+ if (SyscallSets::IsUmask(sysno) ||
+ SyscallSets::IsDeniedFileSystemAccessViaFd(sysno) ||
+ SyscallSets::IsDeniedGetOrModifySocket(sysno) ||
+ SyscallSets::IsProcessPrivilegeChange(sysno)) {
+ return Error(EPERM);
+ }
+
+#if defined(__i386__) || defined(__mips__)
+ if (SyscallSets::IsSocketCall(sysno))
+ return RestrictSocketcallCommand();
+#endif
+
+ if (IsBaselinePolicyWatched(sysno)) {
+ // Previously unseen syscalls. TODO(jln): some of these should
+ // be denied gracefully right away.
+ return CrashSIGSYS();
+ }
+
+ // In any other case crash the program with our SIGSYS handler.
+ return CrashSIGSYS();
+}
+
+} // namespace.
+
+// Unfortunately C++03 doesn't allow delegated constructors.
+// Call other constructor when C++11 lands.
+BaselinePolicy::BaselinePolicy() : BaselinePolicy(EPERM) {}
+
+BaselinePolicy::BaselinePolicy(int fs_denied_errno)
+ : fs_denied_errno_(fs_denied_errno), policy_pid_(sys_getpid()) {
+}
+
+BaselinePolicy::~BaselinePolicy() {
+ // Make sure that this policy is created, used and destroyed by a single
+ // process.
+ DCHECK_EQ(sys_getpid(), policy_pid_);
+}
+
+ResultExpr BaselinePolicy::EvaluateSyscall(int sysno) const {
+ // Sanity check that we're only called with valid syscall numbers.
+ DCHECK(SandboxBPF::IsValidSyscallNumber(sysno));
+ // Make sure that this policy is used in the creating process.
+ if (1 == sysno) {
+ DCHECK_EQ(sys_getpid(), policy_pid_);
+ }
+ return EvaluateSyscallImpl(fs_denied_errno_, policy_pid_, sysno);
+}
+
+ResultExpr BaselinePolicy::InvalidSyscall() const {
+ return CrashSIGSYS();
+}
+
+} // namespace sandbox.
diff --git a/sandbox/linux/seccomp-bpf-helpers/baseline_policy.h b/sandbox/linux/seccomp-bpf-helpers/baseline_policy.h
new file mode 100644
index 0000000000..4169d9c3e2
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf-helpers/baseline_policy.h
@@ -0,0 +1,48 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_HELPERS_BASELINE_POLICY_H_
+#define SANDBOX_LINUX_SECCOMP_BPF_HELPERS_BASELINE_POLICY_H_
+
+#include "sandbox/linux/bpf_dsl/bpf_dsl_forward.h"
+#include "sandbox/linux/bpf_dsl/policy.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+
+// This is a helper to build seccomp-bpf policies, i.e. policies for a sandbox
+// that reduces the Linux kernel's attack surface. Given its nature, it doesn't
+// have a clear semantics and is mostly "implementation-defined".
+//
+// This class implements the Policy interface with a "baseline"
+// policy for use within Chromium.
+// The "baseline" policy is somewhat arbitrary. All Chromium policies are an
+// alteration of it, and it represents a reasonable common ground to run most
+// code in a sandboxed environment.
+// A baseline policy is only valid for the process for which this object was
+// instantiated (so do not fork() and use it in a child).
+class SANDBOX_EXPORT BaselinePolicy : public bpf_dsl::Policy {
+ public:
+ BaselinePolicy();
+ // |fs_denied_errno| is the errno returned when a filesystem access system
+ // call is denied.
+ explicit BaselinePolicy(int fs_denied_errno);
+ ~BaselinePolicy() override;
+
+ bpf_dsl::ResultExpr EvaluateSyscall(int system_call_number) const override;
+ bpf_dsl::ResultExpr InvalidSyscall() const override;
+ pid_t policy_pid() const { return policy_pid_; }
+
+ private:
+ int fs_denied_errno_;
+
+ // The PID that the policy applies to (should be equal to the current pid).
+ pid_t policy_pid_;
+
+ DISALLOW_COPY_AND_ASSIGN(BaselinePolicy);
+};
+
+} // namespace sandbox.
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_HELPERS_BASELINE_POLICY_H_
diff --git a/sandbox/linux/seccomp-bpf-helpers/baseline_policy_unittest.cc b/sandbox/linux/seccomp-bpf-helpers/baseline_policy_unittest.cc
new file mode 100644
index 0000000000..614849f61c
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf-helpers/baseline_policy_unittest.cc
@@ -0,0 +1,334 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf-helpers/baseline_policy.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "base/files/scoped_file.h"
+#include "base/macros.h"
+#include "base/posix/eintr_wrapper.h"
+#include "base/threading/thread.h"
+#include "build/build_config.h"
+#include "sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h"
+#include "sandbox/linux/seccomp-bpf/bpf_tests.h"
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+#include "sandbox/linux/seccomp-bpf/syscall.h"
+#include "sandbox/linux/services/syscall_wrappers.h"
+#include "sandbox/linux/services/thread_helpers.h"
+#include "sandbox/linux/system_headers/linux_futex.h"
+#include "sandbox/linux/system_headers/linux_syscalls.h"
+#include "sandbox/linux/tests/test_utils.h"
+#include "sandbox/linux/tests/unit_tests.h"
+
+namespace sandbox {
+
+namespace {
+
+// This also tests that read(), write() and fstat() are allowed.
+void TestPipeOrSocketPair(base::ScopedFD read_end, base::ScopedFD write_end) {
+ BPF_ASSERT_LE(0, read_end.get());
+ BPF_ASSERT_LE(0, write_end.get());
+ struct stat stat_buf;
+ int sys_ret = fstat(read_end.get(), &stat_buf);
+ BPF_ASSERT_EQ(0, sys_ret);
+ BPF_ASSERT(S_ISFIFO(stat_buf.st_mode) || S_ISSOCK(stat_buf.st_mode));
+
+ const ssize_t kTestTransferSize = 4;
+ static const char kTestString[kTestTransferSize] = {'T', 'E', 'S', 'T'};
+ ssize_t transfered = 0;
+
+ transfered =
+ HANDLE_EINTR(write(write_end.get(), kTestString, kTestTransferSize));
+ BPF_ASSERT_EQ(kTestTransferSize, transfered);
+ char read_buf[kTestTransferSize + 1] = {0};
+ transfered = HANDLE_EINTR(read(read_end.get(), read_buf, sizeof(read_buf)));
+ BPF_ASSERT_EQ(kTestTransferSize, transfered);
+ BPF_ASSERT_EQ(0, memcmp(kTestString, read_buf, kTestTransferSize));
+}
+
+// Test that a few easy-to-test system calls are allowed.
+BPF_TEST_C(BaselinePolicy, BaselinePolicyBasicAllowed, BaselinePolicy) {
+ BPF_ASSERT_EQ(0, sched_yield());
+
+ int pipefd[2];
+ int sys_ret = pipe(pipefd);
+ BPF_ASSERT_EQ(0, sys_ret);
+ TestPipeOrSocketPair(base::ScopedFD(pipefd[0]), base::ScopedFD(pipefd[1]));
+
+ BPF_ASSERT_LE(1, getpid());
+ BPF_ASSERT_LE(0, getuid());
+}
+
+BPF_TEST_C(BaselinePolicy, FchmodErrno, BaselinePolicy) {
+ int ret = fchmod(-1, 07777);
+ BPF_ASSERT_EQ(-1, ret);
+ // Without the sandbox, this would EBADF instead.
+ BPF_ASSERT_EQ(EPERM, errno);
+}
+
+BPF_TEST_C(BaselinePolicy, ForkErrno, BaselinePolicy) {
+ errno = 0;
+ pid_t pid = fork();
+ const int fork_errno = errno;
+ TestUtils::HandlePostForkReturn(pid);
+
+ BPF_ASSERT_EQ(-1, pid);
+ BPF_ASSERT_EQ(EPERM, fork_errno);
+}
+
+pid_t ForkX86Glibc() {
+ static pid_t ptid;
+ return sys_clone(CLONE_PARENT_SETTID | SIGCHLD, nullptr, &ptid, nullptr,
+ nullptr);
+}
+
+BPF_TEST_C(BaselinePolicy, ForkX86Eperm, BaselinePolicy) {
+ errno = 0;
+ pid_t pid = ForkX86Glibc();
+ const int fork_errno = errno;
+ TestUtils::HandlePostForkReturn(pid);
+
+ BPF_ASSERT_EQ(-1, pid);
+ BPF_ASSERT_EQ(EPERM, fork_errno);
+}
+
+pid_t ForkARMGlibc() {
+ static pid_t ctid;
+ return sys_clone(CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | SIGCHLD, nullptr,
+ nullptr, &ctid, nullptr);
+}
+
+BPF_TEST_C(BaselinePolicy, ForkArmEperm, BaselinePolicy) {
+ errno = 0;
+ pid_t pid = ForkARMGlibc();
+ const int fork_errno = errno;
+ TestUtils::HandlePostForkReturn(pid);
+
+ BPF_ASSERT_EQ(-1, pid);
+ BPF_ASSERT_EQ(EPERM, fork_errno);
+}
+
+BPF_TEST_C(BaselinePolicy, CreateThread, BaselinePolicy) {
+ base::Thread thread("sandbox_tests");
+ BPF_ASSERT(thread.Start());
+}
+
+BPF_DEATH_TEST_C(BaselinePolicy,
+ DisallowedCloneFlagCrashes,
+ DEATH_SEGV_MESSAGE(GetCloneErrorMessageContentForTests()),
+ BaselinePolicy) {
+ pid_t pid = sys_clone(CLONE_THREAD | SIGCHLD);
+ TestUtils::HandlePostForkReturn(pid);
+}
+
+BPF_DEATH_TEST_C(BaselinePolicy,
+ DisallowedKillCrashes,
+ DEATH_SEGV_MESSAGE(GetKillErrorMessageContentForTests()),
+ BaselinePolicy) {
+ BPF_ASSERT_NE(1, getpid());
+ kill(1, 0);
+ _exit(0);
+}
+
+BPF_TEST_C(BaselinePolicy, CanKillSelf, BaselinePolicy) {
+ int sys_ret = kill(getpid(), 0);
+ BPF_ASSERT_EQ(0, sys_ret);
+}
+
+BPF_TEST_C(BaselinePolicy, Socketpair, BaselinePolicy) {
+ int sv[2];
+ int sys_ret = socketpair(AF_UNIX, SOCK_DGRAM, 0, sv);
+ BPF_ASSERT_EQ(0, sys_ret);
+ TestPipeOrSocketPair(base::ScopedFD(sv[0]), base::ScopedFD(sv[1]));
+
+ sys_ret = socketpair(AF_UNIX, SOCK_SEQPACKET, 0, sv);
+ BPF_ASSERT_EQ(0, sys_ret);
+ TestPipeOrSocketPair(base::ScopedFD(sv[0]), base::ScopedFD(sv[1]));
+}
+
+// Not all architectures can restrict the domain for socketpair().
+#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
+BPF_DEATH_TEST_C(BaselinePolicy,
+ SocketpairWrongDomain,
+ DEATH_SEGV_MESSAGE(GetErrorMessageContentForTests()),
+ BaselinePolicy) {
+ int sv[2];
+ ignore_result(socketpair(AF_INET, SOCK_STREAM, 0, sv));
+ _exit(1);
+}
+#endif // defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
+
+BPF_TEST_C(BaselinePolicy, EPERM_open, BaselinePolicy) {
+ errno = 0;
+ int sys_ret = open("/proc/cpuinfo", O_RDONLY);
+ BPF_ASSERT_EQ(-1, sys_ret);
+ BPF_ASSERT_EQ(EPERM, errno);
+}
+
+BPF_TEST_C(BaselinePolicy, EPERM_access, BaselinePolicy) {
+ errno = 0;
+ int sys_ret = access("/proc/cpuinfo", R_OK);
+ BPF_ASSERT_EQ(-1, sys_ret);
+ BPF_ASSERT_EQ(EPERM, errno);
+}
+
+BPF_TEST_C(BaselinePolicy, EPERM_getcwd, BaselinePolicy) {
+ errno = 0;
+ char buf[1024];
+ char* cwd = getcwd(buf, sizeof(buf));
+ BPF_ASSERT_EQ(NULL, cwd);
+ BPF_ASSERT_EQ(EPERM, errno);
+}
+
+BPF_DEATH_TEST_C(BaselinePolicy,
+ SIGSYS_InvalidSyscall,
+ DEATH_SEGV_MESSAGE(GetErrorMessageContentForTests()),
+ BaselinePolicy) {
+ Syscall::InvalidCall();
+}
+
+// A failing test using this macro could be problematic since we perform
+// system calls by passing "0" as every argument.
+// The kernel could SIGSEGV the process or the system call itself could reboot
+// the machine. Some thoughts have been given when hand-picking the system
+// calls below to limit any potential side effects outside of the current
+// process.
+#define TEST_BASELINE_SIGSYS(sysno) \
+ BPF_DEATH_TEST_C(BaselinePolicy, \
+ SIGSYS_##sysno, \
+ DEATH_SEGV_MESSAGE(GetErrorMessageContentForTests()), \
+ BaselinePolicy) { \
+ syscall(sysno, 0, 0, 0, 0, 0, 0); \
+ _exit(1); \
+ }
+
+TEST_BASELINE_SIGSYS(__NR_acct);
+TEST_BASELINE_SIGSYS(__NR_chroot);
+TEST_BASELINE_SIGSYS(__NR_fanotify_init);
+TEST_BASELINE_SIGSYS(__NR_fgetxattr);
+TEST_BASELINE_SIGSYS(__NR_getcpu);
+TEST_BASELINE_SIGSYS(__NR_getitimer);
+TEST_BASELINE_SIGSYS(__NR_init_module);
+TEST_BASELINE_SIGSYS(__NR_io_cancel);
+TEST_BASELINE_SIGSYS(__NR_keyctl);
+TEST_BASELINE_SIGSYS(__NR_mq_open);
+TEST_BASELINE_SIGSYS(__NR_ptrace);
+TEST_BASELINE_SIGSYS(__NR_sched_setaffinity);
+TEST_BASELINE_SIGSYS(__NR_setpgid);
+TEST_BASELINE_SIGSYS(__NR_swapon);
+TEST_BASELINE_SIGSYS(__NR_sysinfo);
+TEST_BASELINE_SIGSYS(__NR_syslog);
+TEST_BASELINE_SIGSYS(__NR_timer_create);
+
+#if !defined(__aarch64__)
+TEST_BASELINE_SIGSYS(__NR_eventfd);
+TEST_BASELINE_SIGSYS(__NR_inotify_init);
+TEST_BASELINE_SIGSYS(__NR_vserver);
+#endif
+
+BPF_DEATH_TEST_C(BaselinePolicy,
+ FutexWithRequeuePriorityInheritence,
+ DEATH_SEGV_MESSAGE(GetFutexErrorMessageContentForTests()),
+ BaselinePolicy) {
+ syscall(__NR_futex, NULL, FUTEX_CMP_REQUEUE_PI, 0, NULL, NULL, 0);
+ _exit(1);
+}
+
+BPF_DEATH_TEST_C(BaselinePolicy,
+ FutexWithRequeuePriorityInheritencePrivate,
+ DEATH_SEGV_MESSAGE(GetFutexErrorMessageContentForTests()),
+ BaselinePolicy) {
+ syscall(__NR_futex, NULL, FUTEX_CMP_REQUEUE_PI_PRIVATE, 0, NULL, NULL, 0);
+ _exit(1);
+}
+
+BPF_DEATH_TEST_C(BaselinePolicy,
+ FutexWithUnlockPIPrivate,
+ DEATH_SEGV_MESSAGE(GetFutexErrorMessageContentForTests()),
+ BaselinePolicy) {
+ syscall(__NR_futex, NULL, FUTEX_UNLOCK_PI_PRIVATE, 0, NULL, NULL, 0);
+ _exit(1);
+}
+
+BPF_TEST_C(BaselinePolicy, PrctlDumpable, BaselinePolicy) {
+ const int is_dumpable = prctl(PR_GET_DUMPABLE, 0, 0, 0, 0);
+ BPF_ASSERT(is_dumpable == 1 || is_dumpable == 0);
+ const int prctl_ret = prctl(PR_SET_DUMPABLE, is_dumpable, 0, 0, 0, 0);
+ BPF_ASSERT_EQ(0, prctl_ret);
+}
+
+// Workaround incomplete Android headers.
+#if !defined(PR_CAPBSET_READ)
+#define PR_CAPBSET_READ 23
+#endif
+
+BPF_DEATH_TEST_C(BaselinePolicy,
+ PrctlSigsys,
+ DEATH_SEGV_MESSAGE(GetPrctlErrorMessageContentForTests()),
+ BaselinePolicy) {
+ prctl(PR_CAPBSET_READ, 0, 0, 0, 0);
+ _exit(1);
+}
+
+BPF_TEST_C(BaselinePolicy, GetOrSetPriority, BaselinePolicy) {
+ errno = 0;
+ const int original_prio = getpriority(PRIO_PROCESS, 0);
+ // Check errno instead of the return value since this system call can return
+ // -1 as a valid value.
+ BPF_ASSERT_EQ(0, errno);
+
+ errno = 0;
+ int rc = getpriority(PRIO_PROCESS, getpid());
+ BPF_ASSERT_EQ(0, errno);
+
+ rc = getpriority(PRIO_PROCESS, getpid() + 1);
+ BPF_ASSERT_EQ(-1, rc);
+ BPF_ASSERT_EQ(EPERM, errno);
+
+ rc = setpriority(PRIO_PROCESS, 0, original_prio);
+ BPF_ASSERT_EQ(0, rc);
+
+ rc = setpriority(PRIO_PROCESS, getpid(), original_prio);
+ BPF_ASSERT_EQ(0, rc);
+
+ errno = 0;
+ rc = setpriority(PRIO_PROCESS, getpid() + 1, original_prio);
+ BPF_ASSERT_EQ(-1, rc);
+ BPF_ASSERT_EQ(EPERM, errno);
+}
+
+BPF_DEATH_TEST_C(BaselinePolicy,
+ GetPrioritySigsys,
+ DEATH_SEGV_MESSAGE(GetErrorMessageContentForTests()),
+ BaselinePolicy) {
+ getpriority(PRIO_USER, 0);
+ _exit(1);
+}
+
+BPF_DEATH_TEST_C(BaselinePolicy,
+ ClockGettimeWithDisallowedClockCrashes,
+ DEATH_SEGV_MESSAGE(sandbox::GetErrorMessageContentForTests()),
+ BaselinePolicy) {
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+}
+
+} // namespace
+
+} // namespace sandbox
diff --git a/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.cc b/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.cc
new file mode 100644
index 0000000000..05250d147f
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.cc
@@ -0,0 +1,297 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Note: any code in this file MUST be async-signal safe.
+
+#include "sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h"
+
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "base/logging.h"
+#include "base/posix/eintr_wrapper.h"
+#include "build/build_config.h"
+#include "sandbox/linux/bpf_dsl/bpf_dsl.h"
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+#include "sandbox/linux/seccomp-bpf/syscall.h"
+#include "sandbox/linux/services/syscall_wrappers.h"
+#include "sandbox/linux/system_headers/linux_syscalls.h"
+
+#if defined(__mips__)
+// __NR_Linux, is defined in <asm/unistd.h>.
+#include <asm/unistd.h>
+#endif
+
+#define SECCOMP_MESSAGE_COMMON_CONTENT "seccomp-bpf failure"
+#define SECCOMP_MESSAGE_CLONE_CONTENT "clone() failure"
+#define SECCOMP_MESSAGE_PRCTL_CONTENT "prctl() failure"
+#define SECCOMP_MESSAGE_IOCTL_CONTENT "ioctl() failure"
+#define SECCOMP_MESSAGE_KILL_CONTENT "(tg)kill() failure"
+#define SECCOMP_MESSAGE_FUTEX_CONTENT "futex() failure"
+
+namespace {
+
+inline bool IsArchitectureX86_64() {
+#if defined(__x86_64__)
+ return true;
+#else
+ return false;
+#endif
+}
+
+// Write |error_message| to stderr. Similar to RawLog(), but a bit more careful
+// about async-signal safety. |size| is the size to write and should typically
+// not include a terminating \0.
+void WriteToStdErr(const char* error_message, size_t size) {
+ while (size > 0) {
+ // TODO(jln): query the current policy to check if send() is available and
+ // use it to perform a non-blocking write.
+ const int ret = HANDLE_EINTR(write(STDERR_FILENO, error_message, size));
+ // We can't handle any type of error here.
+ if (ret <= 0 || static_cast<size_t>(ret) > size) break;
+ size -= ret;
+ error_message += ret;
+ }
+}
+
+// Invalid syscall values are truncated to zero.
+// On architectures where base value is zero (Intel and Arm),
+// syscall number is the same as offset from base.
+// This function returns values between 0 and 1023 on all architectures.
+// On architectures where base value is different than zero (currently only
+// Mips), we are truncating valid syscall values to offset from base.
+uint32_t SyscallNumberToOffsetFromBase(uint32_t sysno) {
+#if defined(__mips__)
+ // On MIPS syscall numbers are in different range than on x86 and ARM.
+ // Valid MIPS O32 ABI syscall __NR_syscall will be truncated to zero for
+ // simplicity.
+ sysno = sysno - __NR_Linux;
+#endif
+
+ if (sysno >= 1024)
+ sysno = 0;
+
+ return sysno;
+}
+
+// Print a seccomp-bpf failure to handle |sysno| to stderr in an
+// async-signal safe way.
+void PrintSyscallError(uint32_t sysno) {
+ if (sysno >= 1024)
+ sysno = 0;
+ // TODO(markus): replace with async-signal safe snprintf when available.
+ const size_t kNumDigits = 4;
+ char sysno_base10[kNumDigits];
+ uint32_t rem = sysno;
+ uint32_t mod = 0;
+ for (int i = kNumDigits - 1; i >= 0; i--) {
+ mod = rem % 10;
+ rem /= 10;
+ sysno_base10[i] = '0' + mod;
+ }
+#if defined(__mips__) && (_MIPS_SIM == _MIPS_SIM_ABI32)
+ static const char kSeccompErrorPrefix[] = __FILE__
+ ":**CRASHING**:" SECCOMP_MESSAGE_COMMON_CONTENT " in syscall 4000 + ";
+#else
+ static const char kSeccompErrorPrefix[] =
+ __FILE__":**CRASHING**:" SECCOMP_MESSAGE_COMMON_CONTENT " in syscall ";
+#endif
+ static const char kSeccompErrorPostfix[] = "\n";
+ WriteToStdErr(kSeccompErrorPrefix, sizeof(kSeccompErrorPrefix) - 1);
+ WriteToStdErr(sysno_base10, sizeof(sysno_base10));
+ WriteToStdErr(kSeccompErrorPostfix, sizeof(kSeccompErrorPostfix) - 1);
+}
+
+} // namespace.
+
+namespace sandbox {
+
+intptr_t CrashSIGSYS_Handler(const struct arch_seccomp_data& args, void* aux) {
+ uint32_t syscall = SyscallNumberToOffsetFromBase(args.nr);
+
+ PrintSyscallError(syscall);
+
+ // Encode 8-bits of the 1st two arguments too, so we can discern which socket
+ // type, which fcntl, ... etc., without being likely to hit a mapped
+ // address.
+ // Do not encode more bits here without thinking about increasing the
+ // likelihood of collision with mapped pages.
+ syscall |= ((args.args[0] & 0xffUL) << 12);
+ syscall |= ((args.args[1] & 0xffUL) << 20);
+ // Purposefully dereference the syscall as an address so it'll show up very
+ // clearly and easily in crash dumps.
+ volatile char* addr = reinterpret_cast<volatile char*>(syscall);
+ *addr = '\0';
+ // In case we hit a mapped address, hit the null page with just the syscall,
+ // for paranoia.
+ syscall &= 0xfffUL;
+ addr = reinterpret_cast<volatile char*>(syscall);
+ *addr = '\0';
+ for (;;)
+ _exit(1);
+}
+
+// TODO(jln): refactor the reporting functions.
+
+intptr_t SIGSYSCloneFailure(const struct arch_seccomp_data& args, void* aux) {
+ static const char kSeccompCloneError[] =
+ __FILE__":**CRASHING**:" SECCOMP_MESSAGE_CLONE_CONTENT "\n";
+ WriteToStdErr(kSeccompCloneError, sizeof(kSeccompCloneError) - 1);
+ // "flags" is the first argument in the kernel's clone().
+ // Mark as volatile to be able to find the value on the stack in a minidump.
+ volatile uint64_t clone_flags = args.args[0];
+ volatile char* addr;
+ if (IsArchitectureX86_64()) {
+ addr = reinterpret_cast<volatile char*>(clone_flags & 0xFFFFFF);
+ *addr = '\0';
+ }
+ // Hit the NULL page if this fails to fault.
+ addr = reinterpret_cast<volatile char*>(clone_flags & 0xFFF);
+ *addr = '\0';
+ for (;;)
+ _exit(1);
+}
+
+intptr_t SIGSYSPrctlFailure(const struct arch_seccomp_data& args,
+ void* /* aux */) {
+ static const char kSeccompPrctlError[] =
+ __FILE__":**CRASHING**:" SECCOMP_MESSAGE_PRCTL_CONTENT "\n";
+ WriteToStdErr(kSeccompPrctlError, sizeof(kSeccompPrctlError) - 1);
+ // Mark as volatile to be able to find the value on the stack in a minidump.
+ volatile uint64_t option = args.args[0];
+ volatile char* addr =
+ reinterpret_cast<volatile char*>(option & 0xFFF);
+ *addr = '\0';
+ for (;;)
+ _exit(1);
+}
+
+intptr_t SIGSYSIoctlFailure(const struct arch_seccomp_data& args,
+ void* /* aux */) {
+ static const char kSeccompIoctlError[] =
+ __FILE__":**CRASHING**:" SECCOMP_MESSAGE_IOCTL_CONTENT "\n";
+ WriteToStdErr(kSeccompIoctlError, sizeof(kSeccompIoctlError) - 1);
+ // Make "request" volatile so that we can see it on the stack in a minidump.
+ volatile uint64_t request = args.args[1];
+ volatile char* addr = reinterpret_cast<volatile char*>(request & 0xFFFF);
+ *addr = '\0';
+ // Hit the NULL page if this fails.
+ addr = reinterpret_cast<volatile char*>(request & 0xFFF);
+ *addr = '\0';
+ for (;;)
+ _exit(1);
+}
+
+intptr_t SIGSYSKillFailure(const struct arch_seccomp_data& args,
+ void* /* aux */) {
+ static const char kSeccompKillError[] =
+ __FILE__":**CRASHING**:" SECCOMP_MESSAGE_KILL_CONTENT "\n";
+ WriteToStdErr(kSeccompKillError, sizeof(kSeccompKillError) - 1);
+ // Make "pid" volatile so that we can see it on the stack in a minidump.
+ volatile uint64_t my_pid = sys_getpid();
+ volatile char* addr = reinterpret_cast<volatile char*>(my_pid & 0xFFF);
+ *addr = '\0';
+ for (;;)
+ _exit(1);
+}
+
+intptr_t SIGSYSFutexFailure(const struct arch_seccomp_data& args,
+ void* /* aux */) {
+ static const char kSeccompFutexError[] =
+ __FILE__ ":**CRASHING**:" SECCOMP_MESSAGE_FUTEX_CONTENT "\n";
+ WriteToStdErr(kSeccompFutexError, sizeof(kSeccompFutexError) - 1);
+ volatile int futex_op = args.args[1];
+ volatile char* addr = reinterpret_cast<volatile char*>(futex_op & 0xFFF);
+ *addr = '\0';
+ for (;;)
+ _exit(1);
+}
+
+intptr_t SIGSYSSchedHandler(const struct arch_seccomp_data& args,
+ void* aux) {
+ switch (args.nr) {
+ case __NR_sched_getaffinity:
+ case __NR_sched_getattr:
+ case __NR_sched_getparam:
+ case __NR_sched_getscheduler:
+ case __NR_sched_rr_get_interval:
+ case __NR_sched_setaffinity:
+ case __NR_sched_setattr:
+ case __NR_sched_setparam:
+ case __NR_sched_setscheduler:
+ const pid_t tid = sys_gettid();
+ // The first argument is the pid. If is our thread id, then replace it
+ // with 0, which is equivalent and allowed by the policy.
+ if (args.args[0] == static_cast<uint64_t>(tid)) {
+ return Syscall::Call(args.nr,
+ 0,
+ static_cast<intptr_t>(args.args[1]),
+ static_cast<intptr_t>(args.args[2]),
+ static_cast<intptr_t>(args.args[3]),
+ static_cast<intptr_t>(args.args[4]),
+ static_cast<intptr_t>(args.args[5]));
+ }
+ break;
+ }
+
+ CrashSIGSYS_Handler(args, aux);
+
+ // Should never be reached.
+ RAW_CHECK(false);
+ return -ENOSYS;
+}
+
+bpf_dsl::ResultExpr CrashSIGSYS() {
+ return bpf_dsl::Trap(CrashSIGSYS_Handler, NULL);
+}
+
+bpf_dsl::ResultExpr CrashSIGSYSClone() {
+ return bpf_dsl::Trap(SIGSYSCloneFailure, NULL);
+}
+
+bpf_dsl::ResultExpr CrashSIGSYSPrctl() {
+ return bpf_dsl::Trap(SIGSYSPrctlFailure, NULL);
+}
+
+bpf_dsl::ResultExpr CrashSIGSYSIoctl() {
+ return bpf_dsl::Trap(SIGSYSIoctlFailure, NULL);
+}
+
+bpf_dsl::ResultExpr CrashSIGSYSKill() {
+ return bpf_dsl::Trap(SIGSYSKillFailure, NULL);
+}
+
+bpf_dsl::ResultExpr CrashSIGSYSFutex() {
+ return bpf_dsl::Trap(SIGSYSFutexFailure, NULL);
+}
+
+bpf_dsl::ResultExpr RewriteSchedSIGSYS() {
+ return bpf_dsl::Trap(SIGSYSSchedHandler, NULL);
+}
+
+const char* GetErrorMessageContentForTests() {
+ return SECCOMP_MESSAGE_COMMON_CONTENT;
+}
+
+const char* GetCloneErrorMessageContentForTests() {
+ return SECCOMP_MESSAGE_CLONE_CONTENT;
+}
+
+const char* GetPrctlErrorMessageContentForTests() {
+ return SECCOMP_MESSAGE_PRCTL_CONTENT;
+}
+
+const char* GetIoctlErrorMessageContentForTests() {
+ return SECCOMP_MESSAGE_IOCTL_CONTENT;
+}
+
+const char* GetKillErrorMessageContentForTests() {
+ return SECCOMP_MESSAGE_KILL_CONTENT;
+}
+
+const char* GetFutexErrorMessageContentForTests() {
+ return SECCOMP_MESSAGE_FUTEX_CONTENT;
+}
+
+} // namespace sandbox.
diff --git a/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h b/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h
new file mode 100644
index 0000000000..c64e994172
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h
@@ -0,0 +1,82 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SIGSYS_HANDLERS_H_
+#define SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SIGSYS_HANDLERS_H_
+
+#include <stdint.h>
+
+#include "build/build_config.h"
+#include "sandbox/linux/bpf_dsl/bpf_dsl_forward.h"
+#include "sandbox/sandbox_export.h"
+
+// The handlers are suitable for use in Trap() error codes. They are
+// guaranteed to be async-signal safe.
+// See sandbox/linux/seccomp-bpf/trap.h to see how they work.
+
+namespace sandbox {
+
+struct arch_seccomp_data;
+
+// This handler will crash the currently running process. The crashing address
+// will be the number of the current system call, extracted from |args|.
+// This handler will also print to stderr the number of the crashing syscall.
+SANDBOX_EXPORT intptr_t
+ CrashSIGSYS_Handler(const struct arch_seccomp_data& args, void* aux);
+
+// The following three handlers are suitable to report failures with the
+// clone(), prctl() and ioctl() system calls respectively.
+
+// The crashing address will be (clone_flags & 0xFFFFFF), where clone_flags is
+// the clone(2) argument, extracted from |args|.
+SANDBOX_EXPORT intptr_t
+ SIGSYSCloneFailure(const struct arch_seccomp_data& args, void* aux);
+// The crashing address will be (option & 0xFFF), where option is the prctl(2)
+// argument.
+SANDBOX_EXPORT intptr_t
+ SIGSYSPrctlFailure(const struct arch_seccomp_data& args, void* aux);
+// The crashing address will be request & 0xFFFF, where request is the ioctl(2)
+// argument.
+SANDBOX_EXPORT intptr_t
+ SIGSYSIoctlFailure(const struct arch_seccomp_data& args, void* aux);
+// The crashing address will be (pid & 0xFFF), where pid is the first
+// argument (and can be a tid).
+SANDBOX_EXPORT intptr_t
+ SIGSYSKillFailure(const struct arch_seccomp_data& args, void* aux);
+// The crashing address will be (op & 0xFFF), where op is the second
+// argument.
+SANDBOX_EXPORT intptr_t
+ SIGSYSFutexFailure(const struct arch_seccomp_data& args, void* aux);
+// If the syscall is not being called on the current tid, crashes in the same
+// way as CrashSIGSYS_Handler. Otherwise, returns the result of calling the
+// syscall with the pid argument set to 0 (which for these calls means the
+// current thread). The following syscalls are supported:
+//
+// sched_getaffinity(), sched_getattr(), sched_getparam(), sched_getscheduler(),
+// sched_rr_get_interval(), sched_setaffinity(), sched_setattr(),
+// sched_setparam(), sched_setscheduler()
+SANDBOX_EXPORT intptr_t
+ SIGSYSSchedHandler(const struct arch_seccomp_data& args, void* aux);
+
+// Variants of the above functions for use with bpf_dsl.
+SANDBOX_EXPORT bpf_dsl::ResultExpr CrashSIGSYS();
+SANDBOX_EXPORT bpf_dsl::ResultExpr CrashSIGSYSClone();
+SANDBOX_EXPORT bpf_dsl::ResultExpr CrashSIGSYSPrctl();
+SANDBOX_EXPORT bpf_dsl::ResultExpr CrashSIGSYSIoctl();
+SANDBOX_EXPORT bpf_dsl::ResultExpr CrashSIGSYSKill();
+SANDBOX_EXPORT bpf_dsl::ResultExpr CrashSIGSYSFutex();
+SANDBOX_EXPORT bpf_dsl::ResultExpr RewriteSchedSIGSYS();
+
+// Following four functions return substrings of error messages used
+// in the above four functions. They are useful in death tests.
+SANDBOX_EXPORT const char* GetErrorMessageContentForTests();
+SANDBOX_EXPORT const char* GetCloneErrorMessageContentForTests();
+SANDBOX_EXPORT const char* GetPrctlErrorMessageContentForTests();
+SANDBOX_EXPORT const char* GetIoctlErrorMessageContentForTests();
+SANDBOX_EXPORT const char* GetKillErrorMessageContentForTests();
+SANDBOX_EXPORT const char* GetFutexErrorMessageContentForTests();
+
+} // namespace sandbox.
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SIGSYS_HANDLERS_H_
diff --git a/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.cc b/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.cc
new file mode 100644
index 0000000000..58ffb843a8
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.cc
@@ -0,0 +1,319 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <fcntl.h>
+#include <linux/net.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/time/time.h"
+#include "build/build_config.h"
+#include "sandbox/linux/bpf_dsl/bpf_dsl.h"
+#include "sandbox/linux/bpf_dsl/seccomp_macros.h"
+#include "sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h"
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+#include "sandbox/linux/system_headers/linux_futex.h"
+#include "sandbox/linux/system_headers/linux_syscalls.h"
+#include "sandbox/linux/system_headers/linux_time.h"
+
+// PNaCl toolchain does not provide sys/ioctl.h header.
+#if !defined(OS_NACL_NONSFI)
+#include <sys/ioctl.h>
+#endif
+
+#if defined(OS_ANDROID)
+
+#if !defined(F_DUPFD_CLOEXEC)
+#define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6)
+#endif
+
+// https://android.googlesource.com/platform/bionic/+/lollipop-release/libc/private/bionic_prctl.h
+#if !defined(PR_SET_VMA)
+#define PR_SET_VMA 0x53564d41
+#endif
+
+// https://android.googlesource.com/platform/system/core/+/lollipop-release/libcutils/sched_policy.c
+#if !defined(PR_SET_TIMERSLACK_PID)
+#define PR_SET_TIMERSLACK_PID 41
+#endif
+
+#endif // defined(OS_ANDROID)
+
+#if defined(__arm__) && !defined(MAP_STACK)
+#define MAP_STACK 0x20000 // Daisy build environment has old headers.
+#endif
+
+#if defined(__mips__) && !defined(MAP_STACK)
+#define MAP_STACK 0x40000
+#endif
+namespace {
+
+inline bool IsArchitectureX86_64() {
+#if defined(__x86_64__)
+ return true;
+#else
+ return false;
+#endif
+}
+
+inline bool IsArchitectureI386() {
+#if defined(__i386__)
+ return true;
+#else
+ return false;
+#endif
+}
+
+inline bool IsAndroid() {
+#if defined(OS_ANDROID)
+ return true;
+#else
+ return false;
+#endif
+}
+
+inline bool IsArchitectureMips() {
+#if defined(__mips__)
+ return true;
+#else
+ return false;
+#endif
+}
+
+} // namespace.
+
+#define CASES SANDBOX_BPF_DSL_CASES
+
+using sandbox::bpf_dsl::Allow;
+using sandbox::bpf_dsl::Arg;
+using sandbox::bpf_dsl::BoolExpr;
+using sandbox::bpf_dsl::Error;
+using sandbox::bpf_dsl::If;
+using sandbox::bpf_dsl::ResultExpr;
+
+namespace sandbox {
+
+#if !defined(OS_NACL_NONSFI)
+// Allow Glibc's and Android pthread creation flags, crash on any other
+// thread creation attempts and EPERM attempts to use neither
+// CLONE_VM, nor CLONE_THREAD, which includes all fork() implementations.
+ResultExpr RestrictCloneToThreadsAndEPERMFork() {
+ const Arg<unsigned long> flags(0);
+
+ // TODO(mdempsky): Extend DSL to support (flags & ~mask1) == mask2.
+ const uint64_t kAndroidCloneMask = CLONE_VM | CLONE_FS | CLONE_FILES |
+ CLONE_SIGHAND | CLONE_THREAD |
+ CLONE_SYSVSEM;
+ const uint64_t kObsoleteAndroidCloneMask = kAndroidCloneMask | CLONE_DETACHED;
+
+ const uint64_t kGlibcPthreadFlags =
+ CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD |
+ CLONE_SYSVSEM | CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
+ const BoolExpr glibc_test = flags == kGlibcPthreadFlags;
+
+ const BoolExpr android_test = flags == kAndroidCloneMask ||
+ flags == kObsoleteAndroidCloneMask ||
+ flags == kGlibcPthreadFlags;
+
+ return If(IsAndroid() ? android_test : glibc_test, Allow())
+ .ElseIf((flags & (CLONE_VM | CLONE_THREAD)) == 0, Error(EPERM))
+ .Else(CrashSIGSYSClone());
+}
+
+ResultExpr RestrictPrctl() {
+ // Will need to add seccomp compositing in the future. PR_SET_PTRACER is
+ // used by breakpad but not needed anymore.
+ const Arg<int> option(0);
+ return Switch(option)
+ .CASES((PR_GET_NAME, PR_SET_NAME, PR_GET_DUMPABLE, PR_SET_DUMPABLE),
+ Allow())
+#if defined(OS_ANDROID)
+ .CASES((PR_SET_VMA, PR_SET_TIMERSLACK_PID), Allow())
+#endif
+ .Default(CrashSIGSYSPrctl());
+}
+
+ResultExpr RestrictIoctl() {
+ const Arg<int> request(1);
+ return Switch(request).CASES((TCGETS, FIONREAD), Allow()).Default(
+ CrashSIGSYSIoctl());
+}
+
+ResultExpr RestrictMmapFlags() {
+ // The flags you see are actually the allowed ones, and the variable is a
+ // "denied" mask because of the negation operator.
+ // Significantly, we don't permit MAP_HUGETLB, or the newer flags such as
+ // MAP_POPULATE.
+ // TODO(davidung), remove MAP_DENYWRITE with updated Tegra libraries.
+ const uint64_t kAllowedMask = MAP_SHARED | MAP_PRIVATE | MAP_ANONYMOUS |
+ MAP_STACK | MAP_NORESERVE | MAP_FIXED |
+ MAP_DENYWRITE;
+ const Arg<int> flags(3);
+ return If((flags & ~kAllowedMask) == 0, Allow()).Else(CrashSIGSYS());
+}
+
+ResultExpr RestrictMprotectFlags() {
+ // The flags you see are actually the allowed ones, and the variable is a
+ // "denied" mask because of the negation operator.
+ // Significantly, we don't permit weird undocumented flags such as
+ // PROT_GROWSDOWN.
+ const uint64_t kAllowedMask = PROT_READ | PROT_WRITE | PROT_EXEC;
+ const Arg<int> prot(2);
+ return If((prot & ~kAllowedMask) == 0, Allow()).Else(CrashSIGSYS());
+}
+
+ResultExpr RestrictFcntlCommands() {
+ // We also restrict the flags in F_SETFL. We don't want to permit flags with
+ // a history of trouble such as O_DIRECT. The flags you see are actually the
+ // allowed ones, and the variable is a "denied" mask because of the negation
+ // operator.
+ // Glibc overrides the kernel's O_LARGEFILE value. Account for this.
+ uint64_t kOLargeFileFlag = O_LARGEFILE;
+ if (IsArchitectureX86_64() || IsArchitectureI386() || IsArchitectureMips())
+ kOLargeFileFlag = 0100000;
+
+ const Arg<int> cmd(1);
+ const Arg<long> long_arg(2);
+
+ const uint64_t kAllowedMask = O_ACCMODE | O_APPEND | O_NONBLOCK | O_SYNC |
+ kOLargeFileFlag | O_CLOEXEC | O_NOATIME;
+ return Switch(cmd)
+ .CASES((F_GETFL,
+ F_GETFD,
+ F_SETFD,
+ F_SETLK,
+ F_SETLKW,
+ F_GETLK,
+ F_DUPFD,
+ F_DUPFD_CLOEXEC),
+ Allow())
+ .Case(F_SETFL,
+ If((long_arg & ~kAllowedMask) == 0, Allow()).Else(CrashSIGSYS()))
+ .Default(CrashSIGSYS());
+}
+
+#if defined(__i386__) || defined(__mips__)
+ResultExpr RestrictSocketcallCommand() {
+ // Unfortunately, we are unable to restrict the first parameter to
+ // socketpair(2). Whilst initially sounding bad, it's noteworthy that very
+ // few protocols actually support socketpair(2). The scary call that we're
+ // worried about, socket(2), remains blocked.
+ const Arg<int> call(0);
+ return Switch(call)
+ .CASES((SYS_SOCKETPAIR,
+ SYS_SHUTDOWN,
+ SYS_RECV,
+ SYS_SEND,
+ SYS_RECVFROM,
+ SYS_SENDTO,
+ SYS_RECVMSG,
+ SYS_SENDMSG),
+ Allow())
+ .Default(Error(EPERM));
+}
+#endif
+
+ResultExpr RestrictKillTarget(pid_t target_pid, int sysno) {
+ switch (sysno) {
+ case __NR_kill:
+ case __NR_tgkill: {
+ const Arg<pid_t> pid(0);
+ return If(pid == target_pid, Allow()).Else(CrashSIGSYSKill());
+ }
+ case __NR_tkill:
+ return CrashSIGSYSKill();
+ default:
+ NOTREACHED();
+ return CrashSIGSYS();
+ }
+}
+
+ResultExpr RestrictFutex() {
+ const uint64_t kAllowedFutexFlags = FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME;
+ const Arg<int> op(1);
+ return Switch(op & ~kAllowedFutexFlags)
+ .CASES((FUTEX_WAIT,
+ FUTEX_WAKE,
+ FUTEX_REQUEUE,
+ FUTEX_CMP_REQUEUE,
+ FUTEX_WAKE_OP,
+ FUTEX_WAIT_BITSET,
+ FUTEX_WAKE_BITSET),
+ Allow())
+ .Default(CrashSIGSYSFutex());
+}
+
+ResultExpr RestrictGetSetpriority(pid_t target_pid) {
+ const Arg<int> which(0);
+ const Arg<int> who(1);
+ return If(which == PRIO_PROCESS,
+ If(who == 0 || who == target_pid, Allow()).Else(Error(EPERM)))
+ .Else(CrashSIGSYS());
+}
+
+ResultExpr RestrictSchedTarget(pid_t target_pid, int sysno) {
+ switch (sysno) {
+ case __NR_sched_getaffinity:
+ case __NR_sched_getattr:
+ case __NR_sched_getparam:
+ case __NR_sched_getscheduler:
+ case __NR_sched_rr_get_interval:
+ case __NR_sched_setaffinity:
+ case __NR_sched_setattr:
+ case __NR_sched_setparam:
+ case __NR_sched_setscheduler: {
+ const Arg<pid_t> pid(0);
+ return If(pid == 0 || pid == target_pid, Allow())
+ .Else(RewriteSchedSIGSYS());
+ }
+ default:
+ NOTREACHED();
+ return CrashSIGSYS();
+ }
+}
+
+ResultExpr RestrictPrlimit64(pid_t target_pid) {
+ const Arg<pid_t> pid(0);
+ return If(pid == 0 || pid == target_pid, Allow()).Else(CrashSIGSYS());
+}
+
+ResultExpr RestrictGetrusage() {
+ const Arg<int> who(0);
+ return If(who == RUSAGE_SELF, Allow()).Else(CrashSIGSYS());
+}
+#endif // !defined(OS_NACL_NONSFI)
+
+ResultExpr RestrictClockID() {
+ static_assert(4 == sizeof(clockid_t), "clockid_t is not 32bit");
+ const Arg<clockid_t> clockid(0);
+ return If(
+#if defined(OS_CHROMEOS)
+ // Allow the special clock for Chrome OS used by Chrome tracing.
+ clockid == base::TraceTicks::kClockSystemTrace ||
+#endif
+ clockid == CLOCK_MONOTONIC ||
+ clockid == CLOCK_MONOTONIC_COARSE ||
+ clockid == CLOCK_PROCESS_CPUTIME_ID ||
+ clockid == CLOCK_REALTIME ||
+ clockid == CLOCK_REALTIME_COARSE ||
+ clockid == CLOCK_THREAD_CPUTIME_ID,
+ Allow()).Else(CrashSIGSYS());
+}
+
+} // namespace sandbox.
diff --git a/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h b/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h
new file mode 100644
index 0000000000..9eb35d10e0
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h
@@ -0,0 +1,100 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SYSCALL_PARAMETERS_RESTRICTIONS_H_
+#define SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SYSCALL_PARAMETERS_RESTRICTIONS_H_
+
+#include <unistd.h>
+
+#include "build/build_config.h"
+#include "sandbox/linux/bpf_dsl/bpf_dsl_forward.h"
+#include "sandbox/sandbox_export.h"
+
+// These are helpers to build seccomp-bpf policies, i.e. policies for a
+// sandbox that reduces the Linux kernel's attack surface. They return a
+// bpf_dsl::ResultExpr suitable to restrict certain system call parameters.
+
+namespace sandbox {
+
+// Allow clone(2) for threads.
+// Reject fork(2) attempts with EPERM.
+// Don't restrict on ASAN.
+// Crash if anything else is attempted.
+SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictCloneToThreadsAndEPERMFork();
+
+// Allow PR_SET_NAME, PR_SET_DUMPABLE, PR_GET_DUMPABLE.
+// Crash if anything else is attempted.
+SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictPrctl();
+
+// Allow TCGETS and FIONREAD.
+// Crash if anything else is attempted.
+SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictIoctl();
+
+// Restrict the flags argument in mmap(2).
+// Only allow: MAP_SHARED | MAP_PRIVATE | MAP_ANONYMOUS |
+// MAP_STACK | MAP_NORESERVE | MAP_FIXED | MAP_DENYWRITE.
+// Crash if any other flag is used.
+SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictMmapFlags();
+
+// Restrict the prot argument in mprotect(2).
+// Only allow: PROT_READ | PROT_WRITE | PROT_EXEC.
+SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictMprotectFlags();
+
+// Restrict fcntl(2) cmd argument to:
+// We allow F_GETFL, F_SETFL, F_GETFD, F_SETFD, F_DUPFD, F_DUPFD_CLOEXEC,
+// F_SETLK, F_SETLKW and F_GETLK.
+// Also, in F_SETFL, restrict the allowed flags to: O_ACCMODE | O_APPEND |
+// O_NONBLOCK | O_SYNC | O_LARGEFILE | O_CLOEXEC | O_NOATIME.
+SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictFcntlCommands();
+
+#if defined(__i386__) || defined(__mips__)
+// Restrict socketcall(2) to only allow socketpair(2), send(2), recv(2),
+// sendto(2), recvfrom(2), shutdown(2), sendmsg(2) and recvmsg(2).
+SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictSocketcallCommand();
+#endif
+
+// Restrict |sysno| (which must be kill, tkill or tgkill) by allowing tgkill or
+// kill iff the first parameter is |target_pid|, crashing otherwise or if
+// |sysno| is tkill.
+SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictKillTarget(pid_t target_pid,
+ int sysno);
+
+// Crash if FUTEX_CMP_REQUEUE_PI is used in the second argument of futex(2).
+SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictFutex();
+
+// Crash if |which| is not PRIO_PROCESS. EPERM if |who| is not 0, neither
+// |target_pid| while calling setpriority(2) / getpriority(2).
+SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictGetSetpriority(pid_t target_pid);
+
+// Restricts |pid| for sched_* syscalls which take a pid as the first argument.
+// We only allow calling these syscalls if the pid argument is equal to the pid
+// of the sandboxed process or 0 (indicating the current thread). The following
+// syscalls are supported:
+//
+// sched_getaffinity(), sched_getattr(), sched_getparam(), sched_getscheduler(),
+// sched_rr_get_interval(), sched_setaffinity(), sched_setattr(),
+// sched_setparam(), sched_setscheduler()
+SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictSchedTarget(pid_t target_pid,
+ int sysno);
+
+// Restricts the |pid| argument of prlimit64 to 0 (meaning the calling process)
+// or target_pid.
+SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictPrlimit64(pid_t target_pid);
+
+// Restricts the |who| argument of getrusage to RUSAGE_SELF (meaning the calling
+// process).
+SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictGetrusage();
+
+// Restrict |clk_id| for clock_getres(), clock_gettime() and clock_settime().
+// We allow accessing only CLOCK_MONOTONIC, CLOCK_PROCESS_CPUTIME_ID,
+// CLOCK_REALTIME, and CLOCK_THREAD_CPUTIME_ID. In particular, this disallows
+// access to arbitrary per-{process,thread} CPU-time clock IDs (such as those
+// returned by {clock,pthread}_getcpuclockid), which can leak information
+// about the state of the host OS.
+// On Chrome OS, base::TraceTicks::kClockSystemTrace is also allowed.
+SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictClockID();
+
+} // namespace sandbox.
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SYSCALL_PARAMETERS_RESTRICTIONS_H_
diff --git a/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions_unittests.cc b/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions_unittests.cc
new file mode 100644
index 0000000000..aaed480d69
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions_unittests.cc
@@ -0,0 +1,282 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h"
+
+#include <errno.h>
+#include <sched.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "base/bind.h"
+#include "base/synchronization/waitable_event.h"
+#include "base/sys_info.h"
+#include "base/threading/thread.h"
+#include "base/time/time.h"
+#include "build/build_config.h"
+#include "sandbox/linux/bpf_dsl/bpf_dsl.h"
+#include "sandbox/linux/bpf_dsl/policy.h"
+#include "sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h"
+#include "sandbox/linux/seccomp-bpf/bpf_tests.h"
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+#include "sandbox/linux/seccomp-bpf/syscall.h"
+#include "sandbox/linux/services/syscall_wrappers.h"
+#include "sandbox/linux/system_headers/linux_syscalls.h"
+#include "sandbox/linux/system_headers/linux_time.h"
+#include "sandbox/linux/tests/unit_tests.h"
+
+#if !defined(OS_ANDROID)
+#include "third_party/lss/linux_syscall_support.h" // for MAKE_PROCESS_CPUCLOCK
+#endif
+
+namespace sandbox {
+
+namespace {
+
+// NOTE: most of the parameter restrictions are tested in
+// baseline_policy_unittest.cc as a more end-to-end test.
+
+using sandbox::bpf_dsl::Allow;
+using sandbox::bpf_dsl::ResultExpr;
+
+class RestrictClockIdPolicy : public bpf_dsl::Policy {
+ public:
+ RestrictClockIdPolicy() {}
+ ~RestrictClockIdPolicy() override {}
+
+ ResultExpr EvaluateSyscall(int sysno) const override {
+ switch (sysno) {
+ case __NR_clock_gettime:
+ case __NR_clock_getres:
+ return RestrictClockID();
+ default:
+ return Allow();
+ }
+ }
+};
+
+void CheckClock(clockid_t clockid) {
+ struct timespec ts;
+ ts.tv_sec = -1;
+ ts.tv_nsec = -1;
+ BPF_ASSERT_EQ(0, clock_getres(clockid, &ts));
+ BPF_ASSERT_EQ(0, ts.tv_sec);
+ BPF_ASSERT_LE(0, ts.tv_nsec);
+ ts.tv_sec = -1;
+ ts.tv_nsec = -1;
+ BPF_ASSERT_EQ(0, clock_gettime(clockid, &ts));
+ BPF_ASSERT_LE(0, ts.tv_sec);
+ BPF_ASSERT_LE(0, ts.tv_nsec);
+}
+
+BPF_TEST_C(ParameterRestrictions,
+ clock_gettime_allowed,
+ RestrictClockIdPolicy) {
+ CheckClock(CLOCK_MONOTONIC);
+ CheckClock(CLOCK_MONOTONIC_COARSE);
+ CheckClock(CLOCK_PROCESS_CPUTIME_ID);
+ CheckClock(CLOCK_REALTIME);
+ CheckClock(CLOCK_REALTIME_COARSE);
+ CheckClock(CLOCK_THREAD_CPUTIME_ID);
+}
+
+BPF_DEATH_TEST_C(ParameterRestrictions,
+ clock_gettime_crash_monotonic_raw,
+ DEATH_SEGV_MESSAGE(sandbox::GetErrorMessageContentForTests()),
+ RestrictClockIdPolicy) {
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+}
+
+#if defined(OS_CHROMEOS)
+
+// A custom BPF tester delegate to run IsRunningOnChromeOS() before
+// the sandbox is enabled because we cannot run it with non-SFI BPF
+// sandbox enabled.
+class ClockSystemTesterDelegate : public sandbox::BPFTesterDelegate {
+ public:
+ ClockSystemTesterDelegate()
+ : is_running_on_chromeos_(base::SysInfo::IsRunningOnChromeOS()) {}
+ ~ClockSystemTesterDelegate() override {}
+
+ scoped_ptr<sandbox::bpf_dsl::Policy> GetSandboxBPFPolicy() override {
+ return scoped_ptr<sandbox::bpf_dsl::Policy>(new RestrictClockIdPolicy());
+ }
+ void RunTestFunction() override {
+ if (is_running_on_chromeos_) {
+ CheckClock(base::TraceTicks::kClockSystemTrace);
+ } else {
+ struct timespec ts;
+ // kClockSystemTrace is 11, which is CLOCK_THREAD_CPUTIME_ID of
+ // the init process (pid=1). If kernel supports this feature,
+ // this may succeed even if this is not running on Chrome OS. We
+ // just check this clock_gettime call does not crash.
+ clock_gettime(base::TraceTicks::kClockSystemTrace, &ts);
+ }
+ }
+
+ private:
+ const bool is_running_on_chromeos_;
+ DISALLOW_COPY_AND_ASSIGN(ClockSystemTesterDelegate);
+};
+
+BPF_TEST_D(BPFTest, BPFTestWithDelegateClass, ClockSystemTesterDelegate);
+
+#elif defined(OS_LINUX)
+
+BPF_DEATH_TEST_C(ParameterRestrictions,
+ clock_gettime_crash_system_trace,
+ DEATH_SEGV_MESSAGE(sandbox::GetErrorMessageContentForTests()),
+ RestrictClockIdPolicy) {
+ struct timespec ts;
+ clock_gettime(base::TraceTicks::kClockSystemTrace, &ts);
+}
+
+#endif // defined(OS_CHROMEOS)
+
+#if !defined(OS_ANDROID)
+BPF_DEATH_TEST_C(ParameterRestrictions,
+ clock_gettime_crash_cpu_clock,
+ DEATH_SEGV_MESSAGE(sandbox::GetErrorMessageContentForTests()),
+ RestrictClockIdPolicy) {
+ // We can't use clock_getcpuclockid() because it's not implemented in newlib,
+ // and it might not work inside the sandbox anyway.
+ const pid_t kInitPID = 1;
+ const clockid_t kInitCPUClockID =
+ MAKE_PROCESS_CPUCLOCK(kInitPID, CPUCLOCK_SCHED);
+
+ struct timespec ts;
+ clock_gettime(kInitCPUClockID, &ts);
+}
+#endif // !defined(OS_ANDROID)
+
+class RestrictSchedPolicy : public bpf_dsl::Policy {
+ public:
+ RestrictSchedPolicy() {}
+ ~RestrictSchedPolicy() override {}
+
+ ResultExpr EvaluateSyscall(int sysno) const override {
+ switch (sysno) {
+ case __NR_sched_getparam:
+ return RestrictSchedTarget(getpid(), sysno);
+ default:
+ return Allow();
+ }
+ }
+};
+
+void CheckSchedGetParam(pid_t pid, struct sched_param* param) {
+ BPF_ASSERT_EQ(0, sched_getparam(pid, param));
+}
+
+void SchedGetParamThread(base::WaitableEvent* thread_run) {
+ const pid_t pid = getpid();
+ const pid_t tid = sys_gettid();
+ BPF_ASSERT_NE(pid, tid);
+
+ struct sched_param current_pid_param;
+ CheckSchedGetParam(pid, &current_pid_param);
+
+ struct sched_param zero_param;
+ CheckSchedGetParam(0, &zero_param);
+
+ struct sched_param tid_param;
+ CheckSchedGetParam(tid, &tid_param);
+
+ BPF_ASSERT_EQ(zero_param.sched_priority, tid_param.sched_priority);
+
+ // Verify that the SIGSYS handler sets errno properly.
+ errno = 0;
+ BPF_ASSERT_EQ(-1, sched_getparam(tid, NULL));
+ BPF_ASSERT_EQ(EINVAL, errno);
+
+ thread_run->Signal();
+}
+
+BPF_TEST_C(ParameterRestrictions,
+ sched_getparam_allowed,
+ RestrictSchedPolicy) {
+ base::WaitableEvent thread_run(true, false);
+ // Run the actual test in a new thread so that the current pid and tid are
+ // different.
+ base::Thread getparam_thread("sched_getparam_thread");
+ BPF_ASSERT(getparam_thread.Start());
+ getparam_thread.message_loop()->PostTask(
+ FROM_HERE, base::Bind(&SchedGetParamThread, &thread_run));
+ BPF_ASSERT(thread_run.TimedWait(base::TimeDelta::FromMilliseconds(5000)));
+ getparam_thread.Stop();
+}
+
+BPF_DEATH_TEST_C(ParameterRestrictions,
+ sched_getparam_crash_non_zero,
+ DEATH_SEGV_MESSAGE(sandbox::GetErrorMessageContentForTests()),
+ RestrictSchedPolicy) {
+ const pid_t kInitPID = 1;
+ struct sched_param param;
+ sched_getparam(kInitPID, &param);
+}
+
+class RestrictPrlimit64Policy : public bpf_dsl::Policy {
+ public:
+ RestrictPrlimit64Policy() {}
+ ~RestrictPrlimit64Policy() override {}
+
+ ResultExpr EvaluateSyscall(int sysno) const override {
+ switch (sysno) {
+ case __NR_prlimit64:
+ return RestrictPrlimit64(getpid());
+ default:
+ return Allow();
+ }
+ }
+};
+
+BPF_TEST_C(ParameterRestrictions, prlimit64_allowed, RestrictPrlimit64Policy) {
+ BPF_ASSERT_EQ(0, sys_prlimit64(0, RLIMIT_AS, NULL, NULL));
+ BPF_ASSERT_EQ(0, sys_prlimit64(getpid(), RLIMIT_AS, NULL, NULL));
+}
+
+BPF_DEATH_TEST_C(ParameterRestrictions,
+ prlimit64_crash_not_self,
+ DEATH_SEGV_MESSAGE(sandbox::GetErrorMessageContentForTests()),
+ RestrictPrlimit64Policy) {
+ const pid_t kInitPID = 1;
+ BPF_ASSERT_NE(kInitPID, getpid());
+ sys_prlimit64(kInitPID, RLIMIT_AS, NULL, NULL);
+}
+
+class RestrictGetrusagePolicy : public bpf_dsl::Policy {
+ public:
+ RestrictGetrusagePolicy() {}
+ ~RestrictGetrusagePolicy() override {}
+
+ ResultExpr EvaluateSyscall(int sysno) const override {
+ switch (sysno) {
+ case __NR_getrusage:
+ return RestrictGetrusage();
+ default:
+ return Allow();
+ }
+ }
+};
+
+BPF_TEST_C(ParameterRestrictions, getrusage_allowed, RestrictGetrusagePolicy) {
+ struct rusage usage;
+ BPF_ASSERT_EQ(0, getrusage(RUSAGE_SELF, &usage));
+}
+
+BPF_DEATH_TEST_C(ParameterRestrictions,
+ getrusage_crash_not_self,
+ DEATH_SEGV_MESSAGE(sandbox::GetErrorMessageContentForTests()),
+ RestrictGetrusagePolicy) {
+ struct rusage usage;
+ getrusage(RUSAGE_CHILDREN, &usage);
+}
+
+} // namespace
+
+} // namespace sandbox
diff --git a/sandbox/linux/seccomp-bpf-helpers/syscall_sets.cc b/sandbox/linux/seccomp-bpf-helpers/syscall_sets.cc
new file mode 100644
index 0000000000..c217d47e2d
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf-helpers/syscall_sets.cc
@@ -0,0 +1,1060 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf-helpers/syscall_sets.h"
+
+#include "build/build_config.h"
+#include "sandbox/linux/system_headers/linux_syscalls.h"
+
+namespace sandbox {
+
+// The functions below cover all existing i386, x86_64, and ARM system calls;
+// excluding syscalls made obsolete in ARM EABI.
+// The implicitly defined sets form a partition of the sets of
+// system calls.
+
+bool SyscallSets::IsKill(int sysno) {
+ switch (sysno) {
+ case __NR_kill:
+ case __NR_tgkill:
+ case __NR_tkill: // Deprecated.
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsAllowedGettime(int sysno) {
+ switch (sysno) {
+ case __NR_gettimeofday:
+#if defined(__i386__) || defined(__x86_64__) || defined(__mips__)
+ case __NR_time:
+#endif
+ return true;
+ case __NR_adjtimex: // Privileged.
+ case __NR_clock_adjtime: // Privileged.
+ case __NR_clock_getres: // Could be allowed.
+ case __NR_clock_gettime:
+ case __NR_clock_nanosleep: // Could be allowed.
+ case __NR_clock_settime: // Privileged.
+#if defined(__i386__) || defined(__mips__)
+ case __NR_ftime: // Obsolete.
+#endif
+ case __NR_settimeofday: // Privileged.
+#if defined(__i386__) || defined(__mips__)
+ case __NR_stime:
+#endif
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsCurrentDirectory(int sysno) {
+ switch (sysno) {
+ case __NR_getcwd:
+ case __NR_chdir:
+ case __NR_fchdir:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsUmask(int sysno) {
+ switch (sysno) {
+ case __NR_umask:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// System calls that directly access the file system. They might acquire
+// a new file descriptor or otherwise perform an operation directly
+// via a path.
+// Both EPERM and ENOENT are valid errno unless otherwise noted in comment.
+bool SyscallSets::IsFileSystem(int sysno) {
+ switch (sysno) {
+#if !defined(__aarch64__)
+ case __NR_access: // EPERM not a valid errno.
+ case __NR_chmod:
+ case __NR_chown:
+#if defined(__i386__) || defined(__arm__)
+ case __NR_chown32:
+#endif
+ case __NR_creat:
+ case __NR_futimesat: // Should be called utimesat ?
+ case __NR_lchown:
+ case __NR_link:
+ case __NR_lstat: // EPERM not a valid errno.
+ case __NR_mkdir:
+ case __NR_mknod:
+ case __NR_open:
+ case __NR_readlink: // EPERM not a valid errno.
+ case __NR_rename:
+ case __NR_rmdir:
+ case __NR_stat: // EPERM not a valid errno.
+ case __NR_symlink:
+ case __NR_unlink:
+ case __NR_uselib: // Neither EPERM, nor ENOENT are valid errno.
+ case __NR_ustat: // Same as above. Deprecated.
+ case __NR_utimes:
+#endif // !defined(__aarch64__)
+
+ case __NR_execve:
+ case __NR_faccessat: // EPERM not a valid errno.
+ case __NR_fchmodat:
+ case __NR_fchownat: // Should be called chownat ?
+#if defined(__x86_64__) || defined(__aarch64__)
+ case __NR_newfstatat: // fstatat(). EPERM not a valid errno.
+#elif defined(__i386__) || defined(__arm__) || defined(__mips__)
+ case __NR_fstatat64:
+#endif
+#if defined(__i386__) || defined(__arm__)
+ case __NR_lchown32:
+#endif
+ case __NR_linkat:
+ case __NR_lookup_dcookie: // ENOENT not a valid errno.
+
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ case __NR_lstat64:
+#endif
+#if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
+ case __NR_memfd_create:
+#endif
+ case __NR_mkdirat:
+ case __NR_mknodat:
+#if defined(__i386__)
+ case __NR_oldlstat:
+ case __NR_oldstat:
+#endif
+ case __NR_openat:
+ case __NR_readlinkat:
+ case __NR_renameat:
+ case __NR_renameat2:
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ case __NR_stat64:
+#endif
+ case __NR_statfs: // EPERM not a valid errno.
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ case __NR_statfs64:
+#endif
+ case __NR_symlinkat:
+ case __NR_truncate:
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ case __NR_truncate64:
+#endif
+ case __NR_unlinkat:
+#if defined(__i386__) || defined(__x86_64__) || defined(__mips__)
+ case __NR_utime:
+#endif
+ case __NR_utimensat: // New.
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsAllowedFileSystemAccessViaFd(int sysno) {
+ switch (sysno) {
+ case __NR_fstat:
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ case __NR_fstat64:
+#endif
+ return true;
+// TODO(jln): these should be denied gracefully as well (moved below).
+#if defined(__i386__) || defined(__x86_64__) || defined(__mips__)
+ case __NR_fadvise64: // EPERM not a valid errno.
+#endif
+#if defined(__i386__)
+ case __NR_fadvise64_64:
+#endif
+#if defined(__arm__)
+ case __NR_arm_fadvise64_64:
+#endif
+ case __NR_fdatasync: // EPERM not a valid errno.
+ case __NR_flock: // EPERM not a valid errno.
+ case __NR_fstatfs: // Give information about the whole filesystem.
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ case __NR_fstatfs64:
+#endif
+ case __NR_fsync: // EPERM not a valid errno.
+#if defined(__i386__)
+ case __NR_oldfstat:
+#endif
+#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \
+ defined(__aarch64__)
+ case __NR_sync_file_range: // EPERM not a valid errno.
+#elif defined(__arm__)
+ case __NR_arm_sync_file_range: // EPERM not a valid errno.
+#endif
+ default:
+ return false;
+ }
+}
+
+// EPERM is a good errno for any of these.
+bool SyscallSets::IsDeniedFileSystemAccessViaFd(int sysno) {
+ switch (sysno) {
+ case __NR_fallocate:
+ case __NR_fchmod:
+ case __NR_fchown:
+ case __NR_ftruncate:
+#if defined(__i386__) || defined(__arm__)
+ case __NR_fchown32:
+#endif
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ case __NR_ftruncate64:
+#endif
+#if !defined(__aarch64__)
+ case __NR_getdents: // EPERM not a valid errno.
+#endif
+ case __NR_getdents64: // EPERM not a valid errno.
+#if defined(__i386__) || defined(__mips__)
+ case __NR_readdir:
+#endif
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsGetSimpleId(int sysno) {
+ switch (sysno) {
+ case __NR_capget:
+ case __NR_getegid:
+ case __NR_geteuid:
+ case __NR_getgid:
+ case __NR_getgroups:
+ case __NR_getpid:
+ case __NR_getppid:
+ case __NR_getresgid:
+ case __NR_getsid:
+ case __NR_gettid:
+ case __NR_getuid:
+ case __NR_getresuid:
+#if defined(__i386__) || defined(__arm__)
+ case __NR_getegid32:
+ case __NR_geteuid32:
+ case __NR_getgid32:
+ case __NR_getgroups32:
+ case __NR_getresgid32:
+ case __NR_getresuid32:
+ case __NR_getuid32:
+#endif
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsProcessPrivilegeChange(int sysno) {
+ switch (sysno) {
+ case __NR_capset:
+#if defined(__i386__) || defined(__x86_64__)
+ case __NR_ioperm: // Intel privilege.
+ case __NR_iopl: // Intel privilege.
+#endif
+ case __NR_setfsgid:
+ case __NR_setfsuid:
+ case __NR_setgid:
+ case __NR_setgroups:
+ case __NR_setregid:
+ case __NR_setresgid:
+ case __NR_setresuid:
+ case __NR_setreuid:
+ case __NR_setuid:
+#if defined(__i386__) || defined(__arm__)
+ case __NR_setfsgid32:
+ case __NR_setfsuid32:
+ case __NR_setgid32:
+ case __NR_setgroups32:
+ case __NR_setregid32:
+ case __NR_setresgid32:
+ case __NR_setresuid32:
+ case __NR_setreuid32:
+ case __NR_setuid32:
+#endif
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsProcessGroupOrSession(int sysno) {
+ switch (sysno) {
+ case __NR_setpgid:
+#if !defined(__aarch64__)
+ case __NR_getpgrp:
+#endif
+ case __NR_setsid:
+ case __NR_getpgid:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsAllowedSignalHandling(int sysno) {
+ switch (sysno) {
+ case __NR_rt_sigaction:
+ case __NR_rt_sigprocmask:
+ case __NR_rt_sigreturn:
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ case __NR_sigaction:
+ case __NR_sigprocmask:
+ case __NR_sigreturn:
+#endif
+ return true;
+ case __NR_rt_sigpending:
+ case __NR_rt_sigqueueinfo:
+ case __NR_rt_sigsuspend:
+ case __NR_rt_sigtimedwait:
+ case __NR_rt_tgsigqueueinfo:
+ case __NR_sigaltstack:
+#if !defined(__aarch64__)
+ case __NR_signalfd:
+#endif
+ case __NR_signalfd4:
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ case __NR_sigpending:
+ case __NR_sigsuspend:
+#endif
+#if defined(__i386__) || defined(__mips__)
+ case __NR_signal:
+ case __NR_sgetmask: // Obsolete.
+ case __NR_ssetmask:
+#endif
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsAllowedOperationOnFd(int sysno) {
+ switch (sysno) {
+ case __NR_close:
+ case __NR_dup:
+#if !defined(__aarch64__)
+ case __NR_dup2:
+#endif
+ case __NR_dup3:
+#if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \
+ defined(__aarch64__)
+ case __NR_shutdown:
+#endif
+ return true;
+ case __NR_fcntl:
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ case __NR_fcntl64:
+#endif
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsKernelInternalApi(int sysno) {
+ switch (sysno) {
+ case __NR_restart_syscall:
+#if defined(__arm__)
+ case __ARM_NR_cmpxchg:
+#endif
+ return true;
+ default:
+ return false;
+ }
+}
+
+// This should be thought through in conjunction with IsFutex().
+bool SyscallSets::IsAllowedProcessStartOrDeath(int sysno) {
+ switch (sysno) {
+ case __NR_exit:
+ case __NR_exit_group:
+ case __NR_wait4:
+ case __NR_waitid:
+#if defined(__i386__)
+ case __NR_waitpid:
+#endif
+ return true;
+ case __NR_clone: // Should be parameter-restricted.
+ case __NR_setns: // Privileged.
+#if !defined(__aarch64__)
+ case __NR_fork:
+#endif
+#if defined(__i386__) || defined(__x86_64__)
+ case __NR_get_thread_area:
+#endif
+#if defined(__i386__) || defined(__x86_64__) || defined(__mips__)
+ case __NR_set_thread_area:
+#endif
+ case __NR_set_tid_address:
+ case __NR_unshare:
+#if !defined(__mips__) && !defined(__aarch64__)
+ case __NR_vfork:
+#endif
+ default:
+ return false;
+ }
+}
+
+// It's difficult to restrict those, but there is attack surface here.
+bool SyscallSets::IsAllowedFutex(int sysno) {
+ switch (sysno) {
+ case __NR_get_robust_list:
+ case __NR_set_robust_list:
+ case __NR_futex:
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsAllowedEpoll(int sysno) {
+ switch (sysno) {
+#if !defined(__aarch64__)
+ case __NR_epoll_create:
+ case __NR_epoll_wait:
+#endif
+ case __NR_epoll_create1:
+ case __NR_epoll_ctl:
+ return true;
+ default:
+#if defined(__x86_64__)
+ case __NR_epoll_ctl_old:
+#endif
+ case __NR_epoll_pwait:
+#if defined(__x86_64__)
+ case __NR_epoll_wait_old:
+#endif
+ return false;
+ }
+}
+
+bool SyscallSets::IsAllowedGetOrModifySocket(int sysno) {
+ switch (sysno) {
+#if !defined(__aarch64__)
+ case __NR_pipe:
+#endif
+ case __NR_pipe2:
+ return true;
+ default:
+#if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \
+ defined(__aarch64__)
+ case __NR_socketpair: // We will want to inspect its argument.
+#endif
+ return false;
+ }
+}
+
+bool SyscallSets::IsDeniedGetOrModifySocket(int sysno) {
+ switch (sysno) {
+#if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \
+ defined(__aarch64__)
+ case __NR_accept:
+ case __NR_accept4:
+ case __NR_bind:
+ case __NR_connect:
+ case __NR_socket:
+ case __NR_listen:
+ return true;
+#endif
+ default:
+ return false;
+ }
+}
+
+#if defined(__i386__) || defined(__mips__)
+// Big multiplexing system call for sockets.
+bool SyscallSets::IsSocketCall(int sysno) {
+ switch (sysno) {
+ case __NR_socketcall:
+ return true;
+ default:
+ return false;
+ }
+}
+#endif
+
+#if defined(__x86_64__) || defined(__arm__) || defined(__mips__)
+bool SyscallSets::IsNetworkSocketInformation(int sysno) {
+ switch (sysno) {
+ case __NR_getpeername:
+ case __NR_getsockname:
+ case __NR_getsockopt:
+ case __NR_setsockopt:
+ return true;
+ default:
+ return false;
+ }
+}
+#endif
+
+bool SyscallSets::IsAllowedAddressSpaceAccess(int sysno) {
+ switch (sysno) {
+ case __NR_brk:
+ case __NR_mlock:
+ case __NR_munlock:
+ case __NR_munmap:
+ return true;
+ case __NR_madvise:
+ case __NR_mincore:
+ case __NR_mlockall:
+#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \
+ defined(__aarch64__)
+ case __NR_mmap:
+#endif
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ case __NR_mmap2:
+#endif
+#if defined(__i386__) || defined(__x86_64__) || defined(__mips__)
+ case __NR_modify_ldt:
+#endif
+ case __NR_mprotect:
+ case __NR_mremap:
+ case __NR_msync:
+ case __NR_munlockall:
+ case __NR_readahead:
+ case __NR_remap_file_pages:
+#if defined(__i386__)
+ case __NR_vm86:
+ case __NR_vm86old:
+#endif
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsAllowedGeneralIo(int sysno) {
+ switch (sysno) {
+ case __NR_lseek:
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ case __NR__llseek:
+#endif
+#if !defined(__aarch64__)
+ case __NR_poll:
+#endif
+ case __NR_ppoll:
+ case __NR_pselect6:
+ case __NR_read:
+ case __NR_readv:
+#if defined(__arm__) || defined(__mips__)
+ case __NR_recv:
+#endif
+#if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \
+ defined(__aarch64__)
+ case __NR_recvfrom: // Could specify source.
+ case __NR_recvmsg: // Could specify source.
+#endif
+#if defined(__i386__) || defined(__x86_64__)
+ case __NR_select:
+#endif
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ case __NR__newselect:
+#endif
+#if defined(__arm__)
+ case __NR_send:
+#endif
+#if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \
+ defined(__aarch64__)
+ case __NR_sendmsg: // Could specify destination.
+ case __NR_sendto: // Could specify destination.
+#endif
+ case __NR_write:
+ case __NR_writev:
+ return true;
+ case __NR_ioctl: // Can be very powerful.
+ case __NR_pread64:
+ case __NR_preadv:
+ case __NR_pwrite64:
+ case __NR_pwritev:
+ case __NR_recvmmsg: // Could specify source.
+ case __NR_sendfile:
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ case __NR_sendfile64:
+#endif
+ case __NR_sendmmsg: // Could specify destination.
+ case __NR_splice:
+ case __NR_tee:
+ case __NR_vmsplice:
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsPrctl(int sysno) {
+ switch (sysno) {
+#if defined(__x86_64__)
+ case __NR_arch_prctl:
+#endif
+ case __NR_prctl:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsSeccomp(int sysno) {
+ switch (sysno) {
+ case __NR_seccomp:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsAllowedBasicScheduler(int sysno) {
+ switch (sysno) {
+ case __NR_sched_yield:
+#if !defined(__aarch64__)
+ case __NR_pause:
+#endif
+ case __NR_nanosleep:
+ return true;
+ case __NR_getpriority:
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ case __NR_nice:
+#endif
+ case __NR_setpriority:
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsAdminOperation(int sysno) {
+ switch (sysno) {
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+ case __NR_bdflush:
+#endif
+ case __NR_kexec_load:
+ case __NR_reboot:
+ case __NR_setdomainname:
+ case __NR_sethostname:
+ case __NR_syslog:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsKernelModule(int sysno) {
+ switch (sysno) {
+#if defined(__i386__) || defined(__x86_64__) || defined(__mips__)
+ case __NR_create_module:
+ case __NR_get_kernel_syms: // Should ENOSYS.
+ case __NR_query_module:
+#endif
+ case __NR_delete_module:
+ case __NR_init_module:
+ case __NR_finit_module:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsGlobalFSViewChange(int sysno) {
+ switch (sysno) {
+ case __NR_pivot_root:
+ case __NR_chroot:
+ case __NR_sync:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsFsControl(int sysno) {
+ switch (sysno) {
+ case __NR_mount:
+ case __NR_nfsservctl:
+ case __NR_quotactl:
+ case __NR_swapoff:
+ case __NR_swapon:
+#if defined(__i386__) || defined(__mips__)
+ case __NR_umount:
+#endif
+ case __NR_umount2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsNuma(int sysno) {
+ switch (sysno) {
+ case __NR_get_mempolicy:
+ case __NR_getcpu:
+ case __NR_mbind:
+#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \
+ defined(__aarch64__)
+ case __NR_migrate_pages:
+#endif
+ case __NR_move_pages:
+ case __NR_set_mempolicy:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsMessageQueue(int sysno) {
+ switch (sysno) {
+ case __NR_mq_getsetattr:
+ case __NR_mq_notify:
+ case __NR_mq_open:
+ case __NR_mq_timedreceive:
+ case __NR_mq_timedsend:
+ case __NR_mq_unlink:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsGlobalProcessEnvironment(int sysno) {
+ switch (sysno) {
+ case __NR_acct: // Privileged.
+#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \
+ defined(__aarch64__)
+ case __NR_getrlimit:
+#endif
+#if defined(__i386__) || defined(__arm__)
+ case __NR_ugetrlimit:
+#endif
+#if defined(__i386__) || defined(__mips__)
+ case __NR_ulimit:
+#endif
+ case __NR_getrusage:
+ case __NR_personality: // Can change its personality as well.
+ case __NR_prlimit64: // Like setrlimit / getrlimit.
+ case __NR_setrlimit:
+ case __NR_times:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsDebug(int sysno) {
+ switch (sysno) {
+ case __NR_ptrace:
+ case __NR_process_vm_readv:
+ case __NR_process_vm_writev:
+ case __NR_kcmp:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsGlobalSystemStatus(int sysno) {
+ switch (sysno) {
+#if !defined(__aarch64__)
+ case __NR__sysctl:
+ case __NR_sysfs:
+#endif
+ case __NR_sysinfo:
+ case __NR_uname:
+#if defined(__i386__)
+ case __NR_olduname:
+ case __NR_oldolduname:
+#endif
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsEventFd(int sysno) {
+ switch (sysno) {
+#if !defined(__aarch64__)
+ case __NR_eventfd:
+#endif
+ case __NR_eventfd2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// Asynchronous I/O API.
+bool SyscallSets::IsAsyncIo(int sysno) {
+ switch (sysno) {
+ case __NR_io_cancel:
+ case __NR_io_destroy:
+ case __NR_io_getevents:
+ case __NR_io_setup:
+ case __NR_io_submit:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsKeyManagement(int sysno) {
+ switch (sysno) {
+ case __NR_add_key:
+ case __NR_keyctl:
+ case __NR_request_key:
+ return true;
+ default:
+ return false;
+ }
+}
+
+#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
+bool SyscallSets::IsSystemVSemaphores(int sysno) {
+ switch (sysno) {
+ case __NR_semctl:
+ case __NR_semget:
+ case __NR_semop:
+ case __NR_semtimedop:
+ return true;
+ default:
+ return false;
+ }
+}
+#endif
+
+#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
+// These give a lot of ambient authority and bypass the setuid sandbox.
+bool SyscallSets::IsSystemVSharedMemory(int sysno) {
+ switch (sysno) {
+ case __NR_shmat:
+ case __NR_shmctl:
+ case __NR_shmdt:
+ case __NR_shmget:
+ return true;
+ default:
+ return false;
+ }
+}
+#endif
+
+#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
+bool SyscallSets::IsSystemVMessageQueue(int sysno) {
+ switch (sysno) {
+ case __NR_msgctl:
+ case __NR_msgget:
+ case __NR_msgrcv:
+ case __NR_msgsnd:
+ return true;
+ default:
+ return false;
+ }
+}
+#endif
+
+#if defined(__i386__) || defined(__mips__)
+// Big system V multiplexing system call.
+bool SyscallSets::IsSystemVIpc(int sysno) {
+ switch (sysno) {
+ case __NR_ipc:
+ return true;
+ default:
+ return false;
+ }
+}
+#endif
+
+bool SyscallSets::IsAnySystemV(int sysno) {
+#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
+ return IsSystemVMessageQueue(sysno) || IsSystemVSemaphores(sysno) ||
+ IsSystemVSharedMemory(sysno);
+#elif defined(__i386__) || defined(__mips__)
+ return IsSystemVIpc(sysno);
+#endif
+}
+
+bool SyscallSets::IsAdvancedScheduler(int sysno) {
+ switch (sysno) {
+ case __NR_ioprio_get: // IO scheduler.
+ case __NR_ioprio_set:
+ case __NR_sched_get_priority_max:
+ case __NR_sched_get_priority_min:
+ case __NR_sched_getaffinity:
+ case __NR_sched_getattr:
+ case __NR_sched_getparam:
+ case __NR_sched_getscheduler:
+ case __NR_sched_rr_get_interval:
+ case __NR_sched_setaffinity:
+ case __NR_sched_setattr:
+ case __NR_sched_setparam:
+ case __NR_sched_setscheduler:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsInotify(int sysno) {
+ switch (sysno) {
+ case __NR_inotify_add_watch:
+#if !defined(__aarch64__)
+ case __NR_inotify_init:
+#endif
+ case __NR_inotify_init1:
+ case __NR_inotify_rm_watch:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsFaNotify(int sysno) {
+ switch (sysno) {
+ case __NR_fanotify_init:
+ case __NR_fanotify_mark:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsTimer(int sysno) {
+ switch (sysno) {
+ case __NR_getitimer:
+#if defined(__i386__) || defined(__x86_64__) || defined(__mips__)
+ case __NR_alarm:
+#endif
+ case __NR_setitimer:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsAdvancedTimer(int sysno) {
+ switch (sysno) {
+ case __NR_timer_create:
+ case __NR_timer_delete:
+ case __NR_timer_getoverrun:
+ case __NR_timer_gettime:
+ case __NR_timer_settime:
+ case __NR_timerfd_create:
+ case __NR_timerfd_gettime:
+ case __NR_timerfd_settime:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsExtendedAttributes(int sysno) {
+ switch (sysno) {
+ case __NR_fgetxattr:
+ case __NR_flistxattr:
+ case __NR_fremovexattr:
+ case __NR_fsetxattr:
+ case __NR_getxattr:
+ case __NR_lgetxattr:
+ case __NR_listxattr:
+ case __NR_llistxattr:
+ case __NR_lremovexattr:
+ case __NR_lsetxattr:
+ case __NR_removexattr:
+ case __NR_setxattr:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// Various system calls that need to be researched.
+// TODO(jln): classify this better.
+bool SyscallSets::IsMisc(int sysno) {
+ switch (sysno) {
+#if !defined(__mips__)
+ case __NR_getrandom:
+#endif
+ case __NR_name_to_handle_at:
+ case __NR_open_by_handle_at:
+ case __NR_perf_event_open:
+ case __NR_syncfs:
+ case __NR_vhangup:
+// The system calls below are not implemented.
+#if defined(__i386__) || defined(__x86_64__) || defined(__mips__)
+ case __NR_afs_syscall:
+#endif
+#if defined(__i386__) || defined(__mips__)
+ case __NR_break:
+#endif
+#if defined(__i386__) || defined(__x86_64__) || defined(__mips__)
+ case __NR_getpmsg:
+#endif
+#if defined(__i386__) || defined(__mips__)
+ case __NR_gtty:
+ case __NR_idle:
+ case __NR_lock:
+ case __NR_mpx:
+ case __NR_prof:
+ case __NR_profil:
+#endif
+#if defined(__i386__) || defined(__x86_64__) || defined(__mips__)
+ case __NR_putpmsg:
+#endif
+#if defined(__x86_64__)
+ case __NR_security:
+#endif
+#if defined(__i386__) || defined(__mips__)
+ case __NR_stty:
+#endif
+#if defined(__x86_64__)
+ case __NR_tuxcall:
+#endif
+#if !defined(__aarch64__)
+ case __NR_vserver:
+#endif
+ return true;
+ default:
+ return false;
+ }
+}
+
+#if defined(__arm__)
+bool SyscallSets::IsArmPciConfig(int sysno) {
+ switch (sysno) {
+ case __NR_pciconfig_iobase:
+ case __NR_pciconfig_read:
+ case __NR_pciconfig_write:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsArmPrivate(int sysno) {
+ switch (sysno) {
+ case __ARM_NR_breakpoint:
+ case __ARM_NR_cacheflush:
+ case __ARM_NR_set_tls:
+ case __ARM_NR_usr26:
+ case __ARM_NR_usr32:
+ return true;
+ default:
+ return false;
+ }
+}
+#endif // defined(__arm__)
+
+#if defined(__mips__)
+bool SyscallSets::IsMipsPrivate(int sysno) {
+ switch (sysno) {
+ case __NR_cacheflush:
+ case __NR_cachectl:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool SyscallSets::IsMipsMisc(int sysno) {
+ switch (sysno) {
+ case __NR_sysmips:
+ case __NR_unused150:
+ return true;
+ default:
+ return false;
+ }
+}
+#endif // defined(__mips__)
+} // namespace sandbox.
diff --git a/sandbox/linux/seccomp-bpf-helpers/syscall_sets.h b/sandbox/linux/seccomp-bpf-helpers/syscall_sets.h
new file mode 100644
index 0000000000..5ba6335a95
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf-helpers/syscall_sets.h
@@ -0,0 +1,112 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SYSCALL_SETS_H_
+#define SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SYSCALL_SETS_H_
+
+#include "base/macros.h"
+#include "build/build_config.h"
+#include "sandbox/sandbox_export.h"
+
+// These are helpers to build seccomp-bpf policies, i.e. policies for a
+// sandbox that reduces the Linux kernel's attack surface. Given their
+// nature, they don't have any clear semantics and are completely
+// "implementation-defined".
+
+namespace sandbox {
+
+class SANDBOX_EXPORT SyscallSets {
+ public:
+ static bool IsKill(int sysno);
+ static bool IsAllowedGettime(int sysno);
+ static bool IsCurrentDirectory(int sysno);
+ static bool IsUmask(int sysno);
+ // System calls that directly access the file system. They might acquire
+ // a new file descriptor or otherwise perform an operation directly
+ // via a path.
+ static bool IsFileSystem(int sysno);
+ static bool IsAllowedFileSystemAccessViaFd(int sysno);
+ static bool IsDeniedFileSystemAccessViaFd(int sysno);
+ static bool IsGetSimpleId(int sysno);
+ static bool IsProcessPrivilegeChange(int sysno);
+ static bool IsProcessGroupOrSession(int sysno);
+ static bool IsAllowedSignalHandling(int sysno);
+ static bool IsAllowedOperationOnFd(int sysno);
+ static bool IsKernelInternalApi(int sysno);
+ // This should be thought through in conjunction with IsFutex().
+ static bool IsAllowedProcessStartOrDeath(int sysno);
+ // It's difficult to restrict those, but there is attack surface here.
+ static bool IsAllowedFutex(int sysno);
+ static bool IsAllowedEpoll(int sysno);
+ static bool IsAllowedGetOrModifySocket(int sysno);
+ static bool IsDeniedGetOrModifySocket(int sysno);
+
+#if defined(__i386__) || defined(__mips__)
+ // Big multiplexing system call for sockets.
+ static bool IsSocketCall(int sysno);
+#endif
+
+#if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \
+ defined(__aarch64__)
+ static bool IsNetworkSocketInformation(int sysno);
+#endif
+
+ static bool IsAllowedAddressSpaceAccess(int sysno);
+ static bool IsAllowedGeneralIo(int sysno);
+ static bool IsPrctl(int sysno);
+ static bool IsSeccomp(int sysno);
+ static bool IsAllowedBasicScheduler(int sysno);
+ static bool IsAdminOperation(int sysno);
+ static bool IsKernelModule(int sysno);
+ static bool IsGlobalFSViewChange(int sysno);
+ static bool IsFsControl(int sysno);
+ static bool IsNuma(int sysno);
+ static bool IsMessageQueue(int sysno);
+ static bool IsGlobalProcessEnvironment(int sysno);
+ static bool IsDebug(int sysno);
+ static bool IsGlobalSystemStatus(int sysno);
+ static bool IsEventFd(int sysno);
+ // Asynchronous I/O API.
+ static bool IsAsyncIo(int sysno);
+ static bool IsKeyManagement(int sysno);
+#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
+ static bool IsSystemVSemaphores(int sysno);
+#endif
+#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
+ // These give a lot of ambient authority and bypass the setuid sandbox.
+ static bool IsSystemVSharedMemory(int sysno);
+#endif
+
+#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
+ static bool IsSystemVMessageQueue(int sysno);
+#endif
+
+#if defined(__i386__) || defined(__mips__)
+ // Big system V multiplexing system call.
+ static bool IsSystemVIpc(int sysno);
+#endif
+
+ static bool IsAnySystemV(int sysno);
+ static bool IsAdvancedScheduler(int sysno);
+ static bool IsInotify(int sysno);
+ static bool IsFaNotify(int sysno);
+ static bool IsTimer(int sysno);
+ static bool IsAdvancedTimer(int sysno);
+ static bool IsExtendedAttributes(int sysno);
+ static bool IsMisc(int sysno);
+#if defined(__arm__)
+ static bool IsArmPciConfig(int sysno);
+ static bool IsArmPrivate(int sysno);
+#endif // defined(__arm__)
+#if defined(__mips__)
+ static bool IsMipsPrivate(int sysno);
+ static bool IsMipsMisc(int sysno);
+#endif // defined(__mips__)
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(SyscallSets);
+};
+
+} // namespace sandbox.
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SYSCALL_SETS_H_
diff --git a/sandbox/linux/seccomp-bpf/bpf_tests_unittest.cc b/sandbox/linux/seccomp-bpf/bpf_tests_unittest.cc
new file mode 100644
index 0000000000..63e1814c90
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/bpf_tests_unittest.cc
@@ -0,0 +1,153 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf/bpf_tests.h"
+
+#include <errno.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "base/logging.h"
+#include "base/memory/scoped_ptr.h"
+#include "build/build_config.h"
+#include "sandbox/linux/bpf_dsl/bpf_dsl.h"
+#include "sandbox/linux/bpf_dsl/policy.h"
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+#include "sandbox/linux/services/syscall_wrappers.h"
+#include "sandbox/linux/system_headers/linux_syscalls.h"
+#include "sandbox/linux/tests/unit_tests.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+using sandbox::bpf_dsl::Allow;
+using sandbox::bpf_dsl::Error;
+using sandbox::bpf_dsl::ResultExpr;
+
+namespace sandbox {
+
+namespace {
+
+class FourtyTwo {
+ public:
+ static const int kMagicValue = 42;
+ FourtyTwo() : value_(kMagicValue) {}
+ int value() { return value_; }
+
+ private:
+ int value_;
+ DISALLOW_COPY_AND_ASSIGN(FourtyTwo);
+};
+
+class EmptyClassTakingPolicy : public bpf_dsl::Policy {
+ public:
+ explicit EmptyClassTakingPolicy(FourtyTwo* fourty_two) {
+ BPF_ASSERT(fourty_two);
+ BPF_ASSERT(FourtyTwo::kMagicValue == fourty_two->value());
+ }
+ ~EmptyClassTakingPolicy() override {}
+
+ ResultExpr EvaluateSyscall(int sysno) const override {
+ DCHECK(SandboxBPF::IsValidSyscallNumber(sysno));
+ return Allow();
+ }
+};
+
+BPF_TEST(BPFTest,
+ BPFAUXPointsToClass,
+ EmptyClassTakingPolicy,
+ FourtyTwo /* *BPF_AUX */) {
+ // BPF_AUX should point to an instance of FourtyTwo.
+ BPF_ASSERT(BPF_AUX);
+ BPF_ASSERT(FourtyTwo::kMagicValue == BPF_AUX->value());
+}
+
+void DummyTestFunction(FourtyTwo *fourty_two) {
+}
+
+TEST(BPFTest, BPFTesterCompatibilityDelegateLeakTest) {
+ // Don't do anything, simply gives dynamic tools an opportunity to detect
+ // leaks.
+ {
+ BPFTesterCompatibilityDelegate<EmptyClassTakingPolicy, FourtyTwo>
+ simple_delegate(DummyTestFunction);
+ }
+ {
+ // Test polymorphism.
+ scoped_ptr<BPFTesterDelegate> simple_delegate(
+ new BPFTesterCompatibilityDelegate<EmptyClassTakingPolicy, FourtyTwo>(
+ DummyTestFunction));
+ }
+}
+
+class EnosysPtracePolicy : public bpf_dsl::Policy {
+ public:
+ EnosysPtracePolicy() { my_pid_ = sys_getpid(); }
+ ~EnosysPtracePolicy() override {
+ // Policies should be able to bind with the process on which they are
+ // created. They should never be created in a parent process.
+ BPF_ASSERT_EQ(my_pid_, sys_getpid());
+ }
+
+ ResultExpr EvaluateSyscall(int system_call_number) const override {
+ CHECK(SandboxBPF::IsValidSyscallNumber(system_call_number));
+ if (system_call_number == __NR_ptrace) {
+ // The EvaluateSyscall function should run in the process that created
+ // the current object.
+ BPF_ASSERT_EQ(my_pid_, sys_getpid());
+ return Error(ENOSYS);
+ } else {
+ return Allow();
+ }
+ }
+
+ private:
+ pid_t my_pid_;
+ DISALLOW_COPY_AND_ASSIGN(EnosysPtracePolicy);
+};
+
+class BasicBPFTesterDelegate : public BPFTesterDelegate {
+ public:
+ BasicBPFTesterDelegate() {}
+ ~BasicBPFTesterDelegate() override {}
+
+ scoped_ptr<bpf_dsl::Policy> GetSandboxBPFPolicy() override {
+ return scoped_ptr<bpf_dsl::Policy>(new EnosysPtracePolicy());
+ }
+ void RunTestFunction() override {
+ errno = 0;
+ int ret = ptrace(PTRACE_TRACEME, -1, NULL, NULL);
+ BPF_ASSERT(-1 == ret);
+ BPF_ASSERT(ENOSYS == errno);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BasicBPFTesterDelegate);
+};
+
+// This is the most powerful and complex way to create a BPF test, but it
+// requires a full class definition (BasicBPFTesterDelegate).
+BPF_TEST_D(BPFTest, BPFTestWithDelegateClass, BasicBPFTesterDelegate);
+
+// This is the simplest form of BPF tests.
+BPF_TEST_C(BPFTest, BPFTestWithInlineTest, EnosysPtracePolicy) {
+ errno = 0;
+ int ret = ptrace(PTRACE_TRACEME, -1, NULL, NULL);
+ BPF_ASSERT(-1 == ret);
+ BPF_ASSERT(ENOSYS == errno);
+}
+
+const char kHelloMessage[] = "Hello";
+
+BPF_DEATH_TEST_C(BPFTest,
+ BPFDeathTestWithInlineTest,
+ DEATH_MESSAGE(kHelloMessage),
+ EnosysPtracePolicy) {
+ LOG(ERROR) << kHelloMessage;
+ _exit(1);
+}
+
+} // namespace
+
+} // namespace sandbox
diff --git a/sandbox/linux/seccomp-bpf/die.cc b/sandbox/linux/seccomp-bpf/die.cc
new file mode 100644
index 0000000000..3baf1f13d9
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/die.cc
@@ -0,0 +1,93 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf/die.h"
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "base/logging.h"
+#include "base/posix/eintr_wrapper.h"
+#include "sandbox/linux/seccomp-bpf/syscall.h"
+#include "sandbox/linux/services/syscall_wrappers.h"
+#include "sandbox/linux/system_headers/linux_signal.h"
+
+namespace sandbox {
+
+void Die::ExitGroup() {
+ // exit_group() should exit our program. After all, it is defined as a
+ // function that doesn't return. But things can theoretically go wrong.
+ // Especially, since we are dealing with system call filters. Continuing
+ // execution would be very bad in most cases where ExitGroup() gets called.
+ // So, we'll try a few other strategies too.
+ Syscall::Call(__NR_exit_group, 1);
+
+ // We have no idea what our run-time environment looks like. So, signal
+ // handlers might or might not do the right thing. Try to reset settings
+ // to a defined state; but we have not way to verify whether we actually
+ // succeeded in doing so. Nonetheless, triggering a fatal signal could help
+ // us terminate.
+ struct sigaction sa = {};
+ sa.sa_handler = LINUX_SIG_DFL;
+ sa.sa_flags = LINUX_SA_RESTART;
+ sys_sigaction(LINUX_SIGSEGV, &sa, nullptr);
+ Syscall::Call(__NR_prctl, PR_SET_DUMPABLE, (void*)0, (void*)0, (void*)0);
+ if (*(volatile char*)0) {
+ }
+
+ // If there is no way for us to ask for the program to exit, the next
+ // best thing we can do is to loop indefinitely. Maybe, somebody will notice
+ // and file a bug...
+ // We in fact retry the system call inside of our loop so that it will
+ // stand out when somebody tries to diagnose the problem by using "strace".
+ for (;;) {
+ Syscall::Call(__NR_exit_group, 1);
+ }
+}
+
+void Die::SandboxDie(const char* msg, const char* file, int line) {
+ if (simple_exit_) {
+ LogToStderr(msg, file, line);
+ } else {
+ logging::LogMessage(file, line, logging::LOG_FATAL).stream() << msg;
+ }
+ ExitGroup();
+}
+
+void Die::RawSandboxDie(const char* msg) {
+ if (!msg)
+ msg = "";
+ RAW_LOG(FATAL, msg);
+ ExitGroup();
+}
+
+void Die::SandboxInfo(const char* msg, const char* file, int line) {
+ if (!suppress_info_) {
+ logging::LogMessage(file, line, logging::LOG_INFO).stream() << msg;
+ }
+}
+
+void Die::LogToStderr(const char* msg, const char* file, int line) {
+ if (msg) {
+ char buf[40];
+ snprintf(buf, sizeof(buf), "%d", line);
+ std::string s = std::string(file) + ":" + buf + ":" + msg + "\n";
+
+ // No need to loop. Short write()s are unlikely and if they happen we
+ // probably prefer them over a loop that blocks.
+ ignore_result(
+ HANDLE_EINTR(Syscall::Call(__NR_write, 2, s.c_str(), s.length())));
+ }
+}
+
+bool Die::simple_exit_ = false;
+bool Die::suppress_info_ = false;
+
+} // namespace sandbox
diff --git a/sandbox/linux/seccomp-bpf/die.h b/sandbox/linux/seccomp-bpf/die.h
new file mode 100644
index 0000000000..b3f3f72c2f
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/die.h
@@ -0,0 +1,68 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_DIE_H__
+#define SANDBOX_LINUX_SECCOMP_BPF_DIE_H__
+
+#include "base/macros.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+
+// This is the main API for using this file. Prints a error message and
+// exits with a fatal error. This is not async-signal safe.
+#define SANDBOX_DIE(m) sandbox::Die::SandboxDie(m, __FILE__, __LINE__)
+
+// An async signal safe version of the same API. Won't print the filename
+// and line numbers.
+#define RAW_SANDBOX_DIE(m) sandbox::Die::RawSandboxDie(m)
+
+// Adds an informational message to the log file or stderr as appropriate.
+#define SANDBOX_INFO(m) sandbox::Die::SandboxInfo(m, __FILE__, __LINE__)
+
+class SANDBOX_EXPORT Die {
+ public:
+ // Terminate the program, even if the current sandbox policy prevents some
+ // of the more commonly used functions used for exiting.
+ // Most users would want to call SANDBOX_DIE() instead, as it logs extra
+ // information. But calling ExitGroup() is correct and in some rare cases
+ // preferable. So, we make it part of the public API.
+ static void ExitGroup() __attribute__((noreturn));
+
+ // This method gets called by SANDBOX_DIE(). There is normally no reason
+ // to call it directly unless you are defining your own exiting macro.
+ static void SandboxDie(const char* msg, const char* file, int line)
+ __attribute__((noreturn));
+
+ static void RawSandboxDie(const char* msg) __attribute__((noreturn));
+
+ // This method gets called by SANDBOX_INFO(). There is normally no reason
+ // to call it directly unless you are defining your own logging macro.
+ static void SandboxInfo(const char* msg, const char* file, int line);
+
+ // Writes a message to stderr. Used as a fall-back choice, if we don't have
+ // any other way to report an error.
+ static void LogToStderr(const char* msg, const char* file, int line);
+
+ // We generally want to run all exit handlers. This means, on SANDBOX_DIE()
+ // we should be calling LOG(FATAL). But there are some situations where
+ // we just need to print a message and then terminate. This would typically
+ // happen in cases where we consume the error message internally (e.g. in
+ // unit tests or in the supportsSeccompSandbox() method).
+ static void EnableSimpleExit() { simple_exit_ = true; }
+
+ // Sometimes we need to disable all informational messages (e.g. from within
+ // unittests).
+ static void SuppressInfoMessages(bool flag) { suppress_info_ = flag; }
+
+ private:
+ static bool simple_exit_;
+ static bool suppress_info_;
+
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Die);
+};
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_DIE_H__
diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
new file mode 100644
index 0000000000..3e4f0adf53
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc
@@ -0,0 +1,279 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+
+// Some headers on Android are missing cdefs: crbug.com/172337.
+// (We can't use OS_ANDROID here since build_config.h is not included).
+#if defined(ANDROID)
+#include <sys/cdefs.h>
+#endif
+
+#include <errno.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "base/compiler_specific.h"
+#include "base/files/scoped_file.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/posix/eintr_wrapper.h"
+#include "sandbox/linux/bpf_dsl/codegen.h"
+#include "sandbox/linux/bpf_dsl/policy.h"
+#include "sandbox/linux/bpf_dsl/policy_compiler.h"
+#include "sandbox/linux/bpf_dsl/seccomp_macros.h"
+#include "sandbox/linux/bpf_dsl/syscall_set.h"
+#include "sandbox/linux/seccomp-bpf/die.h"
+#include "sandbox/linux/seccomp-bpf/syscall.h"
+#include "sandbox/linux/seccomp-bpf/trap.h"
+#include "sandbox/linux/services/proc_util.h"
+#include "sandbox/linux/services/syscall_wrappers.h"
+#include "sandbox/linux/services/thread_helpers.h"
+#include "sandbox/linux/system_headers/linux_filter.h"
+#include "sandbox/linux/system_headers/linux_seccomp.h"
+#include "sandbox/linux/system_headers/linux_syscalls.h"
+#include "third_party/valgrind/valgrind.h"
+
+namespace sandbox {
+
+namespace {
+
+bool IsRunningOnValgrind() { return RUNNING_ON_VALGRIND; }
+
+bool IsSingleThreaded(int proc_fd) {
+ return ThreadHelpers::IsSingleThreaded(proc_fd);
+}
+
+// Check if the kernel supports seccomp-filter (a.k.a. seccomp mode 2) via
+// prctl().
+bool KernelSupportsSeccompBPF() {
+ errno = 0;
+ const int rv = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, nullptr);
+
+ if (rv == -1 && EFAULT == errno) {
+ return true;
+ }
+ return false;
+}
+
+// LG introduced a buggy syscall, sys_set_media_ext, with the same number as
+// seccomp. Return true if the current kernel has this buggy syscall.
+//
+// We want this to work with upcoming versions of seccomp, so we pass bogus
+// flags that are unlikely to ever be used by the kernel. A normal kernel would
+// return -EINVAL, but a buggy LG kernel would return 1.
+bool KernelHasLGBug() {
+#if defined(OS_ANDROID)
+ // sys_set_media will see this as NULL, which should be a safe (non-crashing)
+ // way to invoke it. A genuine seccomp syscall will see it as
+ // SECCOMP_SET_MODE_STRICT.
+ const unsigned int operation = 0;
+ // Chosen by fair dice roll. Guaranteed to be random.
+ const unsigned int flags = 0xf7a46a5c;
+ const int rv = sys_seccomp(operation, flags, nullptr);
+ // A genuine kernel would return -EINVAL (which would set rv to -1 and errno
+ // to EINVAL), or at the very least return some kind of error (which would
+ // set rv to -1). Any other behavior indicates that whatever code received
+ // our syscall was not the real seccomp.
+ if (rv != -1) {
+ return true;
+ }
+#endif // defined(OS_ANDROID)
+
+ return false;
+}
+
+// Check if the kernel supports seccomp-filter via the seccomp system call
+// and the TSYNC feature to enable seccomp on all threads.
+bool KernelSupportsSeccompTsync() {
+ if (KernelHasLGBug()) {
+ return false;
+ }
+
+ errno = 0;
+ const int rv =
+ sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, nullptr);
+
+ if (rv == -1 && errno == EFAULT) {
+ return true;
+ } else {
+ // TODO(jln): turn these into DCHECK after 417888 is considered fixed.
+ CHECK_EQ(-1, rv);
+ CHECK(ENOSYS == errno || EINVAL == errno);
+ return false;
+ }
+}
+
+uint64_t EscapePC() {
+ intptr_t rv = Syscall::Call(-1);
+ if (rv == -1 && errno == ENOSYS) {
+ return 0;
+ }
+ return static_cast<uint64_t>(static_cast<uintptr_t>(rv));
+}
+
+} // namespace
+
+SandboxBPF::SandboxBPF(bpf_dsl::Policy* policy)
+ : proc_fd_(), sandbox_has_started_(false), policy_(policy) {
+}
+
+SandboxBPF::~SandboxBPF() {
+}
+
+// static
+bool SandboxBPF::SupportsSeccompSandbox(SeccompLevel level) {
+ // Never pretend to support seccomp with Valgrind, as it
+ // throws the tool off.
+ if (IsRunningOnValgrind()) {
+ return false;
+ }
+
+ switch (level) {
+ case SeccompLevel::SINGLE_THREADED:
+ return KernelSupportsSeccompBPF();
+ case SeccompLevel::MULTI_THREADED:
+ return KernelSupportsSeccompTsync();
+ }
+ NOTREACHED();
+ return false;
+}
+
+bool SandboxBPF::StartSandbox(SeccompLevel seccomp_level) {
+ DCHECK(policy_);
+ CHECK(seccomp_level == SeccompLevel::SINGLE_THREADED ||
+ seccomp_level == SeccompLevel::MULTI_THREADED);
+
+ if (sandbox_has_started_) {
+ SANDBOX_DIE(
+ "Cannot repeatedly start sandbox. Create a separate Sandbox "
+ "object instead.");
+ return false;
+ }
+
+ if (!proc_fd_.is_valid()) {
+ SetProcFd(ProcUtil::OpenProc());
+ }
+
+ const bool supports_tsync = KernelSupportsSeccompTsync();
+
+ if (seccomp_level == SeccompLevel::SINGLE_THREADED) {
+ // Wait for /proc/self/task/ to update if needed and assert the
+ // process is single threaded.
+ ThreadHelpers::AssertSingleThreaded(proc_fd_.get());
+ } else if (seccomp_level == SeccompLevel::MULTI_THREADED) {
+ if (IsSingleThreaded(proc_fd_.get())) {
+ SANDBOX_DIE("Cannot start sandbox; "
+ "process may be single-threaded when reported as not");
+ return false;
+ }
+ if (!supports_tsync) {
+ SANDBOX_DIE("Cannot start sandbox; kernel does not support synchronizing "
+ "filters for a threadgroup");
+ return false;
+ }
+ }
+
+ // We no longer need access to any files in /proc. We want to do this
+ // before installing the filters, just in case that our policy denies
+ // close().
+ if (proc_fd_.is_valid()) {
+ proc_fd_.reset();
+ }
+
+ // Install the filters.
+ InstallFilter(supports_tsync ||
+ seccomp_level == SeccompLevel::MULTI_THREADED);
+
+ return true;
+}
+
+void SandboxBPF::SetProcFd(base::ScopedFD proc_fd) {
+ proc_fd_.swap(proc_fd);
+}
+
+// static
+bool SandboxBPF::IsValidSyscallNumber(int sysnum) {
+ return SyscallSet::IsValid(sysnum);
+}
+
+// static
+bool SandboxBPF::IsRequiredForUnsafeTrap(int sysno) {
+ return bpf_dsl::PolicyCompiler::IsRequiredForUnsafeTrap(sysno);
+}
+
+// static
+intptr_t SandboxBPF::ForwardSyscall(const struct arch_seccomp_data& args) {
+ return Syscall::Call(
+ args.nr, static_cast<intptr_t>(args.args[0]),
+ static_cast<intptr_t>(args.args[1]), static_cast<intptr_t>(args.args[2]),
+ static_cast<intptr_t>(args.args[3]), static_cast<intptr_t>(args.args[4]),
+ static_cast<intptr_t>(args.args[5]));
+}
+
+scoped_ptr<CodeGen::Program> SandboxBPF::AssembleFilter(
+ bool force_verification) {
+#if !defined(NDEBUG)
+ force_verification = true;
+#endif
+ DCHECK(policy_);
+
+ bpf_dsl::PolicyCompiler compiler(policy_.get(), Trap::Registry());
+ if (Trap::SandboxDebuggingAllowedByUser()) {
+ compiler.DangerousSetEscapePC(EscapePC());
+ }
+ return compiler.Compile(force_verification);
+}
+
+void SandboxBPF::InstallFilter(bool must_sync_threads) {
+ // We want to be very careful in not imposing any requirements on the
+ // policies that are set with SetSandboxPolicy(). This means, as soon as
+ // the sandbox is active, we shouldn't be relying on libraries that could
+ // be making system calls. This, for example, means we should avoid
+ // using the heap and we should avoid using STL functions.
+ // Temporarily copy the contents of the "program" vector into a
+ // stack-allocated array; and then explicitly destroy that object.
+ // This makes sure we don't ex- or implicitly call new/delete after we
+ // installed the BPF filter program in the kernel. Depending on the
+ // system memory allocator that is in effect, these operators can result
+ // in system calls to things like munmap() or brk().
+ CodeGen::Program* program = AssembleFilter(false).release();
+
+ struct sock_filter bpf[program->size()];
+ const struct sock_fprog prog = {static_cast<unsigned short>(program->size()),
+ bpf};
+ memcpy(bpf, &(*program)[0], sizeof(bpf));
+ delete program;
+
+ // Make an attempt to release memory that is no longer needed here, rather
+ // than in the destructor. Try to avoid as much as possible to presume of
+ // what will be possible to do in the new (sandboxed) execution environment.
+ policy_.reset();
+
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ SANDBOX_DIE("Kernel refuses to enable no-new-privs");
+ }
+
+ // Install BPF filter program. If the thread state indicates multi-threading
+ // support, then the kernel hass the seccomp system call. Otherwise, fall
+ // back on prctl, which requires the process to be single-threaded.
+ if (must_sync_threads) {
+ int rv =
+ sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &prog);
+ if (rv) {
+ SANDBOX_DIE(
+ "Kernel refuses to turn on and synchronize threads for BPF filters");
+ }
+ } else {
+ if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
+ SANDBOX_DIE("Kernel refuses to turn on BPF filters");
+ }
+ }
+
+ sandbox_has_started_ = true;
+}
+
+} // namespace sandbox
diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.h b/sandbox/linux/seccomp-bpf/sandbox_bpf.h
new file mode 100644
index 0000000000..96cceb5648
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.h
@@ -0,0 +1,118 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_SANDBOX_BPF_H_
+#define SANDBOX_LINUX_SECCOMP_BPF_SANDBOX_BPF_H_
+
+#include <stdint.h>
+
+#include "base/files/scoped_file.h"
+#include "base/macros.h"
+#include "base/memory/scoped_ptr.h"
+#include "sandbox/linux/bpf_dsl/codegen.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+struct arch_seccomp_data;
+namespace bpf_dsl {
+class Policy;
+}
+
+// This class can be used to apply a syscall sandboxing policy expressed in a
+// bpf_dsl::Policy object to the current process.
+// Syscall sandboxing policies get inherited by subprocesses and, once applied,
+// can never be removed for the lifetime of the process.
+class SANDBOX_EXPORT SandboxBPF {
+ public:
+ enum class SeccompLevel {
+ SINGLE_THREADED,
+ MULTI_THREADED,
+ };
+
+ // Ownership of |policy| is transfered here to the sandbox object.
+ // nullptr is allowed for unit tests.
+ explicit SandboxBPF(bpf_dsl::Policy* policy);
+ // NOTE: Setting a policy and starting the sandbox is a one-way operation.
+ // The kernel does not provide any option for unloading a loaded sandbox. The
+ // sandbox remains engaged even when the object is destructed.
+ ~SandboxBPF();
+
+ // Detect if the kernel supports the specified seccomp level.
+ // See StartSandbox() for a description of these.
+ static bool SupportsSeccompSandbox(SeccompLevel level);
+
+ // This is the main public entry point. It sets up the resources needed by
+ // the sandbox, and enters Seccomp mode.
+ // The calling process must provide a |level| to tell the sandbox which type
+ // of kernel support it should engage.
+ // SINGLE_THREADED will only sandbox the calling thread. Since it would be a
+ // security risk, the sandbox will also check that the current process is
+ // single threaded and crash if it isn't the case.
+ // MULTI_THREADED requires more recent kernel support and allows to sandbox
+ // all the threads of the current process. Be mindful of potential races,
+ // with other threads using disallowed system calls either before or after
+ // the sandbox is engaged.
+ //
+ // It is possible to stack multiple sandboxes by creating separate "Sandbox"
+ // objects and calling "StartSandbox()" on each of them. Please note, that
+ // this requires special care, though, as newly stacked sandboxes can never
+ // relax restrictions imposed by earlier sandboxes. Furthermore, installing
+ // a new policy requires making system calls, that might already be
+ // disallowed.
+ // Finally, stacking does add more kernel overhead than having a single
+ // combined policy. So, it should only be used if there are no alternatives.
+ bool StartSandbox(SeccompLevel level) WARN_UNUSED_RESULT;
+
+ // The sandbox needs to be able to access files in "/proc/self/". If
+ // this directory is not accessible when "StartSandbox()" gets called, the
+ // caller must provide an already opened file descriptor by calling
+ // "SetProcFd()".
+ // The sandbox becomes the new owner of this file descriptor and will
+ // close it when "StartSandbox()" executes or when the sandbox object
+ // disappears.
+ void SetProcFd(base::ScopedFD proc_fd);
+
+ // Checks whether a particular system call number is valid on the current
+ // architecture.
+ static bool IsValidSyscallNumber(int sysnum);
+
+ // UnsafeTraps require some syscalls to always be allowed.
+ // This helper function returns true for these calls.
+ static bool IsRequiredForUnsafeTrap(int sysno);
+
+ // From within an UnsafeTrap() it is often useful to be able to execute
+ // the system call that triggered the trap. The ForwardSyscall() method
+ // makes this easy. It is more efficient than calling glibc's syscall()
+ // function, as it avoid the extra round-trip to the signal handler. And
+ // it automatically does the correct thing to report kernel-style error
+ // conditions, rather than setting errno. See the comments for TrapFnc for
+ // details. In other words, the return value from ForwardSyscall() is
+ // directly suitable as a return value for a trap handler.
+ static intptr_t ForwardSyscall(const struct arch_seccomp_data& args);
+
+ // Assembles a BPF filter program from the current policy. After calling this
+ // function, you must not call any other sandboxing function.
+ // Typically, AssembleFilter() is only used by unit tests and by sandbox
+ // internals. It should not be used by production code.
+ // For performance reasons, we normally only run the assembled BPF program
+ // through the verifier, iff the program was built in debug mode.
+ // But by setting "force_verification", the caller can request that the
+ // verifier is run unconditionally. This is useful for unittests.
+ scoped_ptr<CodeGen::Program> AssembleFilter(bool force_verification);
+
+ private:
+ // Assembles and installs a filter based on the policy that has previously
+ // been configured with SetSandboxPolicy().
+ void InstallFilter(bool must_sync_threads);
+
+ base::ScopedFD proc_fd_;
+ bool sandbox_has_started_;
+ scoped_ptr<bpf_dsl::Policy> policy_;
+
+ DISALLOW_COPY_AND_ASSIGN(SandboxBPF);
+};
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_SANDBOX_BPF_H_
diff --git a/sandbox/linux/seccomp-bpf/syscall.cc b/sandbox/linux/seccomp-bpf/syscall.cc
new file mode 100644
index 0000000000..bc6461f117
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/syscall.cc
@@ -0,0 +1,421 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf/syscall.h"
+
+#include <errno.h>
+#include <stdint.h>
+
+#include "base/logging.h"
+#include "sandbox/linux/bpf_dsl/seccomp_macros.h"
+
+namespace sandbox {
+
+namespace {
+
+#if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \
+ defined(ARCH_CPU_MIPS_FAMILY)
+// Number that's not currently used by any Linux kernel ABIs.
+const int kInvalidSyscallNumber = 0x351d3;
+#else
+#error Unrecognized architecture
+#endif
+
+asm(// We need to be able to tell the kernel exactly where we made a
+ // system call. The C++ compiler likes to sometimes clone or
+ // inline code, which would inadvertently end up duplicating
+ // the entry point.
+ // "gcc" can suppress code duplication with suitable function
+ // attributes, but "clang" doesn't have this ability.
+ // The "clang" developer mailing list suggested that the correct
+ // and portable solution is a file-scope assembly block.
+ // N.B. We do mark our code as a proper function so that backtraces
+ // work correctly. But we make absolutely no attempt to use the
+ // ABI's calling conventions for passing arguments. We will only
+ // ever be called from assembly code and thus can pick more
+ // suitable calling conventions.
+#if defined(__i386__)
+ ".text\n"
+ ".align 16, 0x90\n"
+ ".type SyscallAsm, @function\n"
+ "SyscallAsm:.cfi_startproc\n"
+ // Check if "%eax" is negative. If so, do not attempt to make a
+ // system call. Instead, compute the return address that is visible
+ // to the kernel after we execute "int $0x80". This address can be
+ // used as a marker that BPF code inspects.
+ "test %eax, %eax\n"
+ "jge 1f\n"
+ // Always, make sure that our code is position-independent, or
+ // address space randomization might not work on i386. This means,
+ // we can't use "lea", but instead have to rely on "call/pop".
+ "call 0f; .cfi_adjust_cfa_offset 4\n"
+ "0:pop %eax; .cfi_adjust_cfa_offset -4\n"
+ "addl $2f-0b, %eax\n"
+ "ret\n"
+ // Save register that we don't want to clobber. On i386, we need to
+ // save relatively aggressively, as there are a couple or registers
+ // that are used internally (e.g. %ebx for position-independent
+ // code, and %ebp for the frame pointer), and as we need to keep at
+ // least a few registers available for the register allocator.
+ "1:push %esi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset esi, 0\n"
+ "push %edi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset edi, 0\n"
+ "push %ebx; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebx, 0\n"
+ "push %ebp; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebp, 0\n"
+ // Copy entries from the array holding the arguments into the
+ // correct CPU registers.
+ "movl 0(%edi), %ebx\n"
+ "movl 4(%edi), %ecx\n"
+ "movl 8(%edi), %edx\n"
+ "movl 12(%edi), %esi\n"
+ "movl 20(%edi), %ebp\n"
+ "movl 16(%edi), %edi\n"
+ // Enter the kernel.
+ "int $0x80\n"
+ // This is our "magic" return address that the BPF filter sees.
+ "2:"
+ // Restore any clobbered registers that we didn't declare to the
+ // compiler.
+ "pop %ebp; .cfi_restore ebp; .cfi_adjust_cfa_offset -4\n"
+ "pop %ebx; .cfi_restore ebx; .cfi_adjust_cfa_offset -4\n"
+ "pop %edi; .cfi_restore edi; .cfi_adjust_cfa_offset -4\n"
+ "pop %esi; .cfi_restore esi; .cfi_adjust_cfa_offset -4\n"
+ "ret\n"
+ ".cfi_endproc\n"
+ "9:.size SyscallAsm, 9b-SyscallAsm\n"
+#elif defined(__x86_64__)
+ ".text\n"
+ ".align 16, 0x90\n"
+ ".type SyscallAsm, @function\n"
+ "SyscallAsm:.cfi_startproc\n"
+ // Check if "%rdi" is negative. If so, do not attempt to make a
+ // system call. Instead, compute the return address that is visible
+ // to the kernel after we execute "syscall". This address can be
+ // used as a marker that BPF code inspects.
+ "test %rdi, %rdi\n"
+ "jge 1f\n"
+ // Always make sure that our code is position-independent, or the
+ // linker will throw a hissy fit on x86-64.
+ "lea 2f(%rip), %rax\n"
+ "ret\n"
+ // Now we load the registers used to pass arguments to the system
+ // call: system call number in %rax, and arguments in %rdi, %rsi,
+ // %rdx, %r10, %r8, %r9. Note: These are all caller-save registers
+ // (only %rbx, %rbp, %rsp, and %r12-%r15 are callee-save), so no
+ // need to worry here about spilling registers or CFI directives.
+ "1:movq %rdi, %rax\n"
+ "movq 0(%rsi), %rdi\n"
+ "movq 16(%rsi), %rdx\n"
+ "movq 24(%rsi), %r10\n"
+ "movq 32(%rsi), %r8\n"
+ "movq 40(%rsi), %r9\n"
+ "movq 8(%rsi), %rsi\n"
+ // Enter the kernel.
+ "syscall\n"
+ // This is our "magic" return address that the BPF filter sees.
+ "2:ret\n"
+ ".cfi_endproc\n"
+ "9:.size SyscallAsm, 9b-SyscallAsm\n"
+#elif defined(__arm__)
+ // Throughout this file, we use the same mode (ARM vs. thumb)
+ // that the C++ compiler uses. This means, when transfering control
+ // from C++ to assembly code, we do not need to switch modes (e.g.
+ // by using the "bx" instruction). It also means that our assembly
+ // code should not be invoked directly from code that lives in
+ // other compilation units, as we don't bother implementing thumb
+ // interworking. That's OK, as we don't make any of the assembly
+ // symbols public. They are all local to this file.
+ ".text\n"
+ ".align 2\n"
+ ".type SyscallAsm, %function\n"
+#if defined(__thumb__)
+ ".thumb_func\n"
+#else
+ ".arm\n"
+#endif
+ "SyscallAsm:\n"
+#if !defined(__native_client_nonsfi__)
+ // .fnstart and .fnend pseudo operations creates unwind table.
+ // It also creates a reference to the symbol __aeabi_unwind_cpp_pr0, which
+ // is not provided by PNaCl toolchain. Disable it.
+ ".fnstart\n"
+#endif
+ "@ args = 0, pretend = 0, frame = 8\n"
+ "@ frame_needed = 1, uses_anonymous_args = 0\n"
+#if defined(__thumb__)
+ ".cfi_startproc\n"
+ "push {r7, lr}\n"
+ ".save {r7, lr}\n"
+ ".cfi_offset 14, -4\n"
+ ".cfi_offset 7, -8\n"
+ ".cfi_def_cfa_offset 8\n"
+#else
+ "stmfd sp!, {fp, lr}\n"
+ "add fp, sp, #4\n"
+#endif
+ // Check if "r0" is negative. If so, do not attempt to make a
+ // system call. Instead, compute the return address that is visible
+ // to the kernel after we execute "swi 0". This address can be
+ // used as a marker that BPF code inspects.
+ "cmp r0, #0\n"
+ "bge 1f\n"
+ "adr r0, 2f\n"
+ "b 2f\n"
+ // We declared (almost) all clobbered registers to the compiler. On
+ // ARM there is no particular register pressure. So, we can go
+ // ahead and directly copy the entries from the arguments array
+ // into the appropriate CPU registers.
+ "1:ldr r5, [r6, #20]\n"
+ "ldr r4, [r6, #16]\n"
+ "ldr r3, [r6, #12]\n"
+ "ldr r2, [r6, #8]\n"
+ "ldr r1, [r6, #4]\n"
+ "mov r7, r0\n"
+ "ldr r0, [r6, #0]\n"
+ // Enter the kernel
+ "swi 0\n"
+// Restore the frame pointer. Also restore the program counter from
+// the link register; this makes us return to the caller.
+#if defined(__thumb__)
+ "2:pop {r7, pc}\n"
+ ".cfi_endproc\n"
+#else
+ "2:ldmfd sp!, {fp, pc}\n"
+#endif
+#if !defined(__native_client_nonsfi__)
+ // Do not use .fnstart and .fnend for PNaCl toolchain. See above comment,
+ // for more details.
+ ".fnend\n"
+#endif
+ "9:.size SyscallAsm, 9b-SyscallAsm\n"
+#elif defined(__mips__)
+ ".text\n"
+ ".align 4\n"
+ ".type SyscallAsm, @function\n"
+ "SyscallAsm:.ent SyscallAsm\n"
+ ".frame $sp, 40, $ra\n"
+ ".set push\n"
+ ".set noreorder\n"
+ "addiu $sp, $sp, -40\n"
+ "sw $ra, 36($sp)\n"
+ // Check if "v0" is negative. If so, do not attempt to make a
+ // system call. Instead, compute the return address that is visible
+ // to the kernel after we execute "syscall". This address can be
+ // used as a marker that BPF code inspects.
+ "bgez $v0, 1f\n"
+ " nop\n"
+ "la $v0, 2f\n"
+ "b 2f\n"
+ " nop\n"
+ // On MIPS first four arguments go to registers a0 - a3 and any
+ // argument after that goes to stack. We can go ahead and directly
+ // copy the entries from the arguments array into the appropriate
+ // CPU registers and on the stack.
+ "1:lw $a3, 28($a0)\n"
+ "lw $a2, 24($a0)\n"
+ "lw $a1, 20($a0)\n"
+ "lw $t0, 16($a0)\n"
+ "sw $a3, 28($sp)\n"
+ "sw $a2, 24($sp)\n"
+ "sw $a1, 20($sp)\n"
+ "sw $t0, 16($sp)\n"
+ "lw $a3, 12($a0)\n"
+ "lw $a2, 8($a0)\n"
+ "lw $a1, 4($a0)\n"
+ "lw $a0, 0($a0)\n"
+ // Enter the kernel
+ "syscall\n"
+ // This is our "magic" return address that the BPF filter sees.
+ // Restore the return address from the stack.
+ "2:lw $ra, 36($sp)\n"
+ "jr $ra\n"
+ " addiu $sp, $sp, 40\n"
+ ".set pop\n"
+ ".end SyscallAsm\n"
+ ".size SyscallAsm,.-SyscallAsm\n"
+#elif defined(__aarch64__)
+ ".text\n"
+ ".align 2\n"
+ ".type SyscallAsm, %function\n"
+ "SyscallAsm:\n"
+ ".cfi_startproc\n"
+ "cmp x0, #0\n"
+ "b.ge 1f\n"
+ "adr x0,2f\n"
+ "b 2f\n"
+ "1:ldr x5, [x6, #40]\n"
+ "ldr x4, [x6, #32]\n"
+ "ldr x3, [x6, #24]\n"
+ "ldr x2, [x6, #16]\n"
+ "ldr x1, [x6, #8]\n"
+ "mov x8, x0\n"
+ "ldr x0, [x6, #0]\n"
+ // Enter the kernel
+ "svc 0\n"
+ "2:ret\n"
+ ".cfi_endproc\n"
+ ".size SyscallAsm, .-SyscallAsm\n"
+#endif
+ ); // asm
+
+#if defined(__x86_64__)
+extern "C" {
+intptr_t SyscallAsm(intptr_t nr, const intptr_t args[6]);
+}
+#endif
+
+} // namespace
+
+intptr_t Syscall::InvalidCall() {
+ // Explicitly pass eight zero arguments just in case.
+ return Call(kInvalidSyscallNumber, 0, 0, 0, 0, 0, 0, 0, 0);
+}
+
+intptr_t Syscall::Call(int nr,
+ intptr_t p0,
+ intptr_t p1,
+ intptr_t p2,
+ intptr_t p3,
+ intptr_t p4,
+ intptr_t p5,
+ intptr_t p6,
+ intptr_t p7) {
+ // We rely on "intptr_t" to be the exact size as a "void *". This is
+ // typically true, but just in case, we add a check. The language
+ // specification allows platforms some leeway in cases, where
+ // "sizeof(void *)" is not the same as "sizeof(void (*)())". We expect
+ // that this would only be an issue for IA64, which we are currently not
+ // planning on supporting. And it is even possible that this would work
+ // on IA64, but for lack of actual hardware, I cannot test.
+ static_assert(sizeof(void*) == sizeof(intptr_t),
+ "pointer types and intptr_t must be exactly the same size");
+
+ // TODO(nedeljko): Enable use of more than six parameters on architectures
+ // where that makes sense.
+#if defined(__mips__)
+ const intptr_t args[8] = {p0, p1, p2, p3, p4, p5, p6, p7};
+#else
+ DCHECK_EQ(p6, 0) << " Support for syscalls with more than six arguments not "
+ "added for this architecture";
+ DCHECK_EQ(p7, 0) << " Support for syscalls with more than six arguments not "
+ "added for this architecture";
+ const intptr_t args[6] = {p0, p1, p2, p3, p4, p5};
+#endif // defined(__mips__)
+
+// Invoke our file-scope assembly code. The constraints have been picked
+// carefully to match what the rest of the assembly code expects in input,
+// output, and clobbered registers.
+#if defined(__i386__)
+ intptr_t ret = nr;
+ asm volatile(
+ "call SyscallAsm\n"
+ // N.B. These are not the calling conventions normally used by the ABI.
+ : "=a"(ret)
+ : "0"(ret), "D"(args)
+ : "cc", "esp", "memory", "ecx", "edx");
+#elif defined(__x86_64__)
+ intptr_t ret = SyscallAsm(nr, args);
+#elif defined(__arm__)
+ intptr_t ret;
+ {
+ register intptr_t inout __asm__("r0") = nr;
+ register const intptr_t* data __asm__("r6") = args;
+ asm volatile(
+ "bl SyscallAsm\n"
+ // N.B. These are not the calling conventions normally used by the ABI.
+ : "=r"(inout)
+ : "0"(inout), "r"(data)
+ : "cc",
+ "lr",
+ "memory",
+ "r1",
+ "r2",
+ "r3",
+ "r4",
+ "r5"
+#if !defined(__thumb__)
+ // In thumb mode, we cannot use "r7" as a general purpose register, as
+ // it is our frame pointer. We have to manually manage and preserve
+ // it.
+ // In ARM mode, we have a dedicated frame pointer register and "r7" is
+ // thus available as a general purpose register. We don't preserve it,
+ // but instead mark it as clobbered.
+ ,
+ "r7"
+#endif // !defined(__thumb__)
+ );
+ ret = inout;
+ }
+#elif defined(__mips__)
+ int err_status;
+ intptr_t ret = Syscall::SandboxSyscallRaw(nr, args, &err_status);
+
+ if (err_status) {
+ // On error, MIPS returns errno from syscall instead of -errno.
+ // The purpose of this negation is for SandboxSyscall() to behave
+ // more like it would on other architectures.
+ ret = -ret;
+ }
+#elif defined(__aarch64__)
+ intptr_t ret;
+ {
+ register intptr_t inout __asm__("x0") = nr;
+ register const intptr_t* data __asm__("x6") = args;
+ asm volatile("bl SyscallAsm\n"
+ : "=r"(inout)
+ : "0"(inout), "r"(data)
+ : "memory", "x1", "x2", "x3", "x4", "x5", "x8", "x30");
+ ret = inout;
+ }
+
+#else
+#error "Unimplemented architecture"
+#endif
+ return ret;
+}
+
+void Syscall::PutValueInUcontext(intptr_t ret_val, ucontext_t* ctx) {
+#if defined(__mips__)
+ // Mips ABI states that on error a3 CPU register has non zero value and if
+ // there is no error, it should be zero.
+ if (ret_val <= -1 && ret_val >= -4095) {
+ // |ret_val| followes the Syscall::Call() convention of being -errno on
+ // errors. In order to write correct value to return register this sign
+ // needs to be changed back.
+ ret_val = -ret_val;
+ SECCOMP_PARM4(ctx) = 1;
+ } else
+ SECCOMP_PARM4(ctx) = 0;
+#endif
+ SECCOMP_RESULT(ctx) = static_cast<greg_t>(ret_val);
+}
+
+#if defined(__mips__)
+intptr_t Syscall::SandboxSyscallRaw(int nr,
+ const intptr_t* args,
+ intptr_t* err_ret) {
+ register intptr_t ret __asm__("v0") = nr;
+ // a3 register becomes non zero on error.
+ register intptr_t err_stat __asm__("a3") = 0;
+ {
+ register const intptr_t* data __asm__("a0") = args;
+ asm volatile(
+ "la $t9, SyscallAsm\n"
+ "jalr $t9\n"
+ " nop\n"
+ : "=r"(ret), "=r"(err_stat)
+ : "0"(ret),
+ "r"(data)
+ // a2 is in the clober list so inline assembly can not change its
+ // value.
+ : "memory", "ra", "t9", "a2");
+ }
+
+ // Set an error status so it can be used outside of this function
+ *err_ret = err_stat;
+
+ return ret;
+}
+#endif // defined(__mips__)
+
+} // namespace sandbox
diff --git a/sandbox/linux/seccomp-bpf/syscall.h b/sandbox/linux/seccomp-bpf/syscall.h
new file mode 100644
index 0000000000..ccfc88dcb3
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/syscall.h
@@ -0,0 +1,166 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_H__
+#define SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_H__
+
+#include <signal.h>
+#include <stdint.h>
+
+#include "base/macros.h"
+#include "sandbox/linux/system_headers/linux_signal.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+
+// This purely static class can be used to perform system calls with some
+// low-level control.
+class SANDBOX_EXPORT Syscall {
+ public:
+ // InvalidCall() invokes Call() with a platform-appropriate syscall
+ // number that is guaranteed to not be implemented (i.e., normally
+ // returns -ENOSYS).
+ // This is primarily meant to be useful for writing sandbox policy
+ // unit tests.
+ static intptr_t InvalidCall();
+
+ // System calls can take up to six parameters (up to eight on some
+ // architectures). Traditionally, glibc
+ // implements this property by using variadic argument lists. This works, but
+ // confuses modern tools such as valgrind, because we are nominally passing
+ // uninitialized data whenever we call through this function and pass less
+ // than the full six arguments.
+ // So, instead, we use C++'s template system to achieve a very similar
+ // effect. C++ automatically sets the unused parameters to zero for us, and
+ // it also does the correct type expansion (e.g. from 32bit to 64bit) where
+ // necessary.
+ // We have to use C-style cast operators as we want to be able to accept both
+ // integer and pointer types.
+ template <class T0,
+ class T1,
+ class T2,
+ class T3,
+ class T4,
+ class T5,
+ class T6,
+ class T7>
+ static inline intptr_t
+ Call(int nr, T0 p0, T1 p1, T2 p2, T3 p3, T4 p4, T5 p5, T6 p6, T7 p7) {
+ return Call(nr,
+ (intptr_t)p0,
+ (intptr_t)p1,
+ (intptr_t)p2,
+ (intptr_t)p3,
+ (intptr_t)p4,
+ (intptr_t)p5,
+ (intptr_t)p6,
+ (intptr_t)p7);
+ }
+
+ template <class T0,
+ class T1,
+ class T2,
+ class T3,
+ class T4,
+ class T5,
+ class T6>
+ static inline intptr_t
+ Call(int nr, T0 p0, T1 p1, T2 p2, T3 p3, T4 p4, T5 p5, T6 p6) {
+ return Call(nr,
+ (intptr_t)p0,
+ (intptr_t)p1,
+ (intptr_t)p2,
+ (intptr_t)p3,
+ (intptr_t)p4,
+ (intptr_t)p5,
+ (intptr_t)p6,
+ 0);
+ }
+
+ template <class T0, class T1, class T2, class T3, class T4, class T5>
+ static inline intptr_t
+ Call(int nr, T0 p0, T1 p1, T2 p2, T3 p3, T4 p4, T5 p5) {
+ return Call(nr,
+ (intptr_t)p0,
+ (intptr_t)p1,
+ (intptr_t)p2,
+ (intptr_t)p3,
+ (intptr_t)p4,
+ (intptr_t)p5,
+ 0,
+ 0);
+ }
+
+ template <class T0, class T1, class T2, class T3, class T4>
+ static inline intptr_t Call(int nr, T0 p0, T1 p1, T2 p2, T3 p3, T4 p4) {
+ return Call(nr, p0, p1, p2, p3, p4, 0, 0, 0);
+ }
+
+ template <class T0, class T1, class T2, class T3>
+ static inline intptr_t Call(int nr, T0 p0, T1 p1, T2 p2, T3 p3) {
+ return Call(nr, p0, p1, p2, p3, 0, 0, 0, 0);
+ }
+
+ template <class T0, class T1, class T2>
+ static inline intptr_t Call(int nr, T0 p0, T1 p1, T2 p2) {
+ return Call(nr, p0, p1, p2, 0, 0, 0, 0, 0);
+ }
+
+ template <class T0, class T1>
+ static inline intptr_t Call(int nr, T0 p0, T1 p1) {
+ return Call(nr, p0, p1, 0, 0, 0, 0, 0, 0);
+ }
+
+ template <class T0>
+ static inline intptr_t Call(int nr, T0 p0) {
+ return Call(nr, p0, 0, 0, 0, 0, 0, 0, 0);
+ }
+
+ static inline intptr_t Call(int nr) {
+ return Call(nr, 0, 0, 0, 0, 0, 0, 0, 0);
+ }
+
+ // Set the registers in |ctx| to match what they would be after a system call
+ // returning |ret_val|. |ret_val| must follow the Syscall::Call() convention
+ // of being -errno on errors.
+ static void PutValueInUcontext(intptr_t ret_val, ucontext_t* ctx);
+
+ private:
+ // This performs system call |nr| with the arguments p0 to p7 from a constant
+ // userland address, which is for instance observable by seccomp-bpf filters.
+ // The constant userland address from which these system calls are made will
+ // be returned if |nr| is passed as -1.
+ // On error, this function will return a value between -1 and -4095 which
+ // should be interpreted as -errno.
+ static intptr_t Call(int nr,
+ intptr_t p0,
+ intptr_t p1,
+ intptr_t p2,
+ intptr_t p3,
+ intptr_t p4,
+ intptr_t p5,
+ intptr_t p6,
+ intptr_t p7);
+
+#if defined(__mips__)
+ // This function basically does on MIPS what SandboxSyscall() is doing on
+ // other architectures. However, because of specificity of MIPS regarding
+ // handling syscall errors, SandboxSyscall() is made as a wrapper for this
+ // function in order for SandboxSyscall() to behave more like on other
+ // architectures on places where return value from SandboxSyscall() is used
+ // directly (like in most tests).
+ // The syscall "nr" is called with arguments that are set in an array on which
+ // pointer "args" points to and an information weather there is an error or no
+ // is returned to SandboxSyscall() by err_stat.
+ static intptr_t SandboxSyscallRaw(int nr,
+ const intptr_t* args,
+ intptr_t* err_stat);
+#endif // defined(__mips__)
+
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Syscall);
+};
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_H__
diff --git a/sandbox/linux/seccomp-bpf/syscall_unittest.cc b/sandbox/linux/seccomp-bpf/syscall_unittest.cc
new file mode 100644
index 0000000000..5fdee6c495
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/syscall_unittest.cc
@@ -0,0 +1,240 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf/syscall.h"
+
+#include <asm/unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <vector>
+
+#include "base/posix/eintr_wrapper.h"
+#include "build/build_config.h"
+#include "sandbox/linux/bpf_dsl/bpf_dsl.h"
+#include "sandbox/linux/bpf_dsl/policy.h"
+#include "sandbox/linux/seccomp-bpf/bpf_tests.h"
+#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
+#include "sandbox/linux/tests/unit_tests.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+using sandbox::bpf_dsl::Allow;
+using sandbox::bpf_dsl::ResultExpr;
+using sandbox::bpf_dsl::Trap;
+
+namespace sandbox {
+
+namespace {
+
+// Different platforms use different symbols for the six-argument version
+// of the mmap() system call. Test for the correct symbol at compile time.
+#ifdef __NR_mmap2
+const int kMMapNr = __NR_mmap2;
+#else
+const int kMMapNr = __NR_mmap;
+#endif
+
+TEST(Syscall, InvalidCallReturnsENOSYS) {
+ EXPECT_EQ(-ENOSYS, Syscall::InvalidCall());
+}
+
+TEST(Syscall, WellKnownEntryPoint) {
+// Test that Syscall::Call(-1) is handled specially. Don't do this on ARM,
+// where syscall(-1) crashes with SIGILL. Not running the test is fine, as we
+// are still testing ARM code in the next set of tests.
+#if !defined(__arm__) && !defined(__aarch64__)
+ EXPECT_NE(Syscall::Call(-1), syscall(-1));
+#endif
+
+// If possible, test that Syscall::Call(-1) returns the address right
+// after
+// a kernel entry point.
+#if defined(__i386__)
+ EXPECT_EQ(0x80CDu, ((uint16_t*)Syscall::Call(-1))[-1]); // INT 0x80
+#elif defined(__x86_64__)
+ EXPECT_EQ(0x050Fu, ((uint16_t*)Syscall::Call(-1))[-1]); // SYSCALL
+#elif defined(__arm__)
+#if defined(__thumb__)
+ EXPECT_EQ(0xDF00u, ((uint16_t*)Syscall::Call(-1))[-1]); // SWI 0
+#else
+ EXPECT_EQ(0xEF000000u, ((uint32_t*)Syscall::Call(-1))[-1]); // SVC 0
+#endif
+#elif defined(__mips__)
+ // Opcode for MIPS sycall is in the lower 16-bits
+ EXPECT_EQ(0x0cu, (((uint32_t*)Syscall::Call(-1))[-1]) & 0x0000FFFF);
+#elif defined(__aarch64__)
+ EXPECT_EQ(0xD4000001u, ((uint32_t*)Syscall::Call(-1))[-1]); // SVC 0
+#else
+#warning Incomplete test case; need port for target platform
+#endif
+}
+
+TEST(Syscall, TrivialSyscallNoArgs) {
+ // Test that we can do basic system calls
+ EXPECT_EQ(Syscall::Call(__NR_getpid), syscall(__NR_getpid));
+}
+
+TEST(Syscall, TrivialSyscallOneArg) {
+ int new_fd;
+ // Duplicate standard error and close it.
+ ASSERT_GE(new_fd = Syscall::Call(__NR_dup, 2), 0);
+ int close_return_value = IGNORE_EINTR(Syscall::Call(__NR_close, new_fd));
+ ASSERT_EQ(close_return_value, 0);
+}
+
+TEST(Syscall, TrivialFailingSyscall) {
+ errno = -42;
+ int ret = Syscall::Call(__NR_dup, -1);
+ ASSERT_EQ(-EBADF, ret);
+ // Verify that Syscall::Call does not touch errno.
+ ASSERT_EQ(-42, errno);
+}
+
+// SIGSYS trap handler that will be called on __NR_uname.
+intptr_t CopySyscallArgsToAux(const struct arch_seccomp_data& args, void* aux) {
+ // |aux| is our BPF_AUX pointer.
+ std::vector<uint64_t>* const seen_syscall_args =
+ static_cast<std::vector<uint64_t>*>(aux);
+ BPF_ASSERT(arraysize(args.args) == 6);
+ seen_syscall_args->assign(args.args, args.args + arraysize(args.args));
+ return -ENOMEM;
+}
+
+class CopyAllArgsOnUnamePolicy : public bpf_dsl::Policy {
+ public:
+ explicit CopyAllArgsOnUnamePolicy(std::vector<uint64_t>* aux) : aux_(aux) {}
+ ~CopyAllArgsOnUnamePolicy() override {}
+
+ ResultExpr EvaluateSyscall(int sysno) const override {
+ DCHECK(SandboxBPF::IsValidSyscallNumber(sysno));
+ if (sysno == __NR_uname) {
+ return Trap(CopySyscallArgsToAux, aux_);
+ } else {
+ return Allow();
+ }
+ }
+
+ private:
+ std::vector<uint64_t>* aux_;
+
+ DISALLOW_COPY_AND_ASSIGN(CopyAllArgsOnUnamePolicy);
+};
+
+// We are testing Syscall::Call() by making use of a BPF filter that
+// allows us
+// to inspect the system call arguments that the kernel saw.
+BPF_TEST(Syscall,
+ SyntheticSixArgs,
+ CopyAllArgsOnUnamePolicy,
+ std::vector<uint64_t> /* (*BPF_AUX) */) {
+ const int kExpectedValue = 42;
+ // In this test we only pass integers to the kernel. We might want to make
+ // additional tests to try other types. What we will see depends on
+ // implementation details of kernel BPF filters and we will need to document
+ // the expected behavior very clearly.
+ int syscall_args[6];
+ for (size_t i = 0; i < arraysize(syscall_args); ++i) {
+ syscall_args[i] = kExpectedValue + i;
+ }
+
+ // We could use pretty much any system call we don't need here. uname() is
+ // nice because it doesn't have any dangerous side effects.
+ BPF_ASSERT(Syscall::Call(__NR_uname,
+ syscall_args[0],
+ syscall_args[1],
+ syscall_args[2],
+ syscall_args[3],
+ syscall_args[4],
+ syscall_args[5]) == -ENOMEM);
+
+ // We expect the trap handler to have copied the 6 arguments.
+ BPF_ASSERT(BPF_AUX->size() == 6);
+
+ // Don't loop here so that we can see which argument does cause the failure
+ // easily from the failing line.
+ // uint64_t is the type passed to our SIGSYS handler.
+ BPF_ASSERT((*BPF_AUX)[0] == static_cast<uint64_t>(syscall_args[0]));
+ BPF_ASSERT((*BPF_AUX)[1] == static_cast<uint64_t>(syscall_args[1]));
+ BPF_ASSERT((*BPF_AUX)[2] == static_cast<uint64_t>(syscall_args[2]));
+ BPF_ASSERT((*BPF_AUX)[3] == static_cast<uint64_t>(syscall_args[3]));
+ BPF_ASSERT((*BPF_AUX)[4] == static_cast<uint64_t>(syscall_args[4]));
+ BPF_ASSERT((*BPF_AUX)[5] == static_cast<uint64_t>(syscall_args[5]));
+}
+
+TEST(Syscall, ComplexSyscallSixArgs) {
+ int fd;
+ ASSERT_LE(0,
+ fd = Syscall::Call(__NR_openat, AT_FDCWD, "/dev/null", O_RDWR, 0L));
+
+ // Use mmap() to allocate some read-only memory
+ char* addr0;
+ ASSERT_NE(
+ (char*)NULL,
+ addr0 = reinterpret_cast<char*>(Syscall::Call(kMMapNr,
+ (void*)NULL,
+ 4096,
+ PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ fd,
+ 0L)));
+
+ // Try to replace the existing mapping with a read-write mapping
+ char* addr1;
+ ASSERT_EQ(addr0,
+ addr1 = reinterpret_cast<char*>(
+ Syscall::Call(kMMapNr,
+ addr0,
+ 4096L,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ fd,
+ 0L)));
+ ++*addr1; // This should not seg fault
+
+ // Clean up
+ EXPECT_EQ(0, Syscall::Call(__NR_munmap, addr1, 4096L));
+ EXPECT_EQ(0, IGNORE_EINTR(Syscall::Call(__NR_close, fd)));
+
+ // Check that the offset argument (i.e. the sixth argument) is processed
+ // correctly.
+ ASSERT_GE(
+ fd = Syscall::Call(__NR_openat, AT_FDCWD, "/proc/self/exe", O_RDONLY, 0L),
+ 0);
+ char* addr2, *addr3;
+ ASSERT_NE((char*)NULL,
+ addr2 = reinterpret_cast<char*>(Syscall::Call(
+ kMMapNr, (void*)NULL, 8192L, PROT_READ, MAP_PRIVATE, fd, 0L)));
+ ASSERT_NE((char*)NULL,
+ addr3 = reinterpret_cast<char*>(Syscall::Call(kMMapNr,
+ (void*)NULL,
+ 4096L,
+ PROT_READ,
+ MAP_PRIVATE,
+ fd,
+#if defined(__NR_mmap2)
+ 1L
+#else
+ 4096L
+#endif
+ )));
+ EXPECT_EQ(0, memcmp(addr2 + 4096, addr3, 4096));
+
+ // Just to be absolutely on the safe side, also verify that the file
+ // contents matches what we are getting from a read() operation.
+ char buf[8192];
+ EXPECT_EQ(8192, Syscall::Call(__NR_read, fd, buf, 8192L));
+ EXPECT_EQ(0, memcmp(addr2, buf, 8192));
+
+ // Clean up
+ EXPECT_EQ(0, Syscall::Call(__NR_munmap, addr2, 8192L));
+ EXPECT_EQ(0, Syscall::Call(__NR_munmap, addr3, 4096L));
+ EXPECT_EQ(0, IGNORE_EINTR(Syscall::Call(__NR_close, fd)));
+}
+
+} // namespace
+
+} // namespace sandbox
diff --git a/sandbox/linux/seccomp-bpf/trap.cc b/sandbox/linux/seccomp-bpf/trap.cc
new file mode 100644
index 0000000000..8f559e53b1
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/trap.cc
@@ -0,0 +1,390 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf/trap.h"
+
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/syscall.h>
+
+#include <algorithm>
+#include <limits>
+
+#include "base/compiler_specific.h"
+#include "base/logging.h"
+#include "build/build_config.h"
+#include "sandbox/linux/bpf_dsl/seccomp_macros.h"
+#include "sandbox/linux/seccomp-bpf/die.h"
+#include "sandbox/linux/seccomp-bpf/syscall.h"
+#include "sandbox/linux/services/syscall_wrappers.h"
+#include "sandbox/linux/system_headers/linux_seccomp.h"
+#include "sandbox/linux/system_headers/linux_signal.h"
+
+namespace {
+
+struct arch_sigsys {
+ void* ip;
+ int nr;
+ unsigned int arch;
+};
+
+const int kCapacityIncrement = 20;
+
+// Unsafe traps can only be turned on, if the user explicitly allowed them
+// by setting the CHROME_SANDBOX_DEBUGGING environment variable.
+const char kSandboxDebuggingEnv[] = "CHROME_SANDBOX_DEBUGGING";
+
+// We need to tell whether we are performing a "normal" callback, or
+// whether we were called recursively from within a UnsafeTrap() callback.
+// This is a little tricky to do, because we need to somehow get access to
+// per-thread data from within a signal context. Normal TLS storage is not
+// safely accessible at this time. We could roll our own, but that involves
+// a lot of complexity. Instead, we co-opt one bit in the signal mask.
+// If BUS is blocked, we assume that we have been called recursively.
+// There is a possibility for collision with other code that needs to do
+// this, but in practice the risks are low.
+// If SIGBUS turns out to be a problem, we could instead co-opt one of the
+// realtime signals. There are plenty of them. Unfortunately, there is no
+// way to mark a signal as allocated. So, the potential for collision is
+// possibly even worse.
+bool GetIsInSigHandler(const ucontext_t* ctx) {
+ // Note: on Android, sigismember does not take a pointer to const.
+ return sigismember(const_cast<sigset_t*>(&ctx->uc_sigmask), LINUX_SIGBUS);
+}
+
+void SetIsInSigHandler() {
+ sigset_t mask;
+ if (sigemptyset(&mask) || sigaddset(&mask, LINUX_SIGBUS) ||
+ sandbox::sys_sigprocmask(LINUX_SIG_BLOCK, &mask, NULL)) {
+ SANDBOX_DIE("Failed to block SIGBUS");
+ }
+}
+
+bool IsDefaultSignalAction(const struct sigaction& sa) {
+ if (sa.sa_flags & SA_SIGINFO || sa.sa_handler != SIG_DFL) {
+ return false;
+ }
+ return true;
+}
+
+} // namespace
+
+namespace sandbox {
+
+Trap::Trap()
+ : trap_array_(NULL),
+ trap_array_size_(0),
+ trap_array_capacity_(0),
+ has_unsafe_traps_(false) {
+ // Set new SIGSYS handler
+ struct sigaction sa = {};
+ // In some toolchain, sa_sigaction is not declared in struct sigaction.
+ // So, here cast the pointer to the sa_handler's type. This works because
+ // |sa_handler| and |sa_sigaction| shares the same memory.
+ sa.sa_handler = reinterpret_cast<void (*)(int)>(SigSysAction);
+ sa.sa_flags = LINUX_SA_SIGINFO | LINUX_SA_NODEFER;
+ struct sigaction old_sa = {};
+ if (sys_sigaction(LINUX_SIGSYS, &sa, &old_sa) < 0) {
+ SANDBOX_DIE("Failed to configure SIGSYS handler");
+ }
+
+ if (!IsDefaultSignalAction(old_sa)) {
+ static const char kExistingSIGSYSMsg[] =
+ "Existing signal handler when trying to install SIGSYS. SIGSYS needs "
+ "to be reserved for seccomp-bpf.";
+ DLOG(FATAL) << kExistingSIGSYSMsg;
+ LOG(ERROR) << kExistingSIGSYSMsg;
+ }
+
+ // Unmask SIGSYS
+ sigset_t mask;
+ if (sigemptyset(&mask) || sigaddset(&mask, LINUX_SIGSYS) ||
+ sys_sigprocmask(LINUX_SIG_UNBLOCK, &mask, NULL)) {
+ SANDBOX_DIE("Failed to configure SIGSYS handler");
+ }
+}
+
+bpf_dsl::TrapRegistry* Trap::Registry() {
+ // Note: This class is not thread safe. It is the caller's responsibility
+ // to avoid race conditions. Normally, this is a non-issue as the sandbox
+ // can only be initialized if there are no other threads present.
+ // Also, this is not a normal singleton. Once created, the global trap
+ // object must never be destroyed again.
+ if (!global_trap_) {
+ global_trap_ = new Trap();
+ if (!global_trap_) {
+ SANDBOX_DIE("Failed to allocate global trap handler");
+ }
+ }
+ return global_trap_;
+}
+
+void Trap::SigSysAction(int nr, LinuxSigInfo* info, void* void_context) {
+ if (info) {
+ MSAN_UNPOISON(info, sizeof(*info));
+ }
+
+ // Obtain the signal context. This, most notably, gives us access to
+ // all CPU registers at the time of the signal.
+ ucontext_t* ctx = reinterpret_cast<ucontext_t*>(void_context);
+ if (ctx) {
+ MSAN_UNPOISON(ctx, sizeof(*ctx));
+ }
+
+ if (!global_trap_) {
+ RAW_SANDBOX_DIE(
+ "This can't happen. Found no global singleton instance "
+ "for Trap() handling.");
+ }
+ global_trap_->SigSys(nr, info, ctx);
+}
+
+void Trap::SigSys(int nr, LinuxSigInfo* info, ucontext_t* ctx) {
+ // Signal handlers should always preserve "errno". Otherwise, we could
+ // trigger really subtle bugs.
+ const int old_errno = errno;
+
+ // Various sanity checks to make sure we actually received a signal
+ // triggered by a BPF filter. If something else triggered SIGSYS
+ // (e.g. kill()), there is really nothing we can do with this signal.
+ if (nr != LINUX_SIGSYS || info->si_code != SYS_SECCOMP || !ctx ||
+ info->si_errno <= 0 ||
+ static_cast<size_t>(info->si_errno) > trap_array_size_) {
+ // ATI drivers seem to send SIGSYS, so this cannot be FATAL.
+ // See crbug.com/178166.
+ // TODO(jln): add a DCHECK or move back to FATAL.
+ RAW_LOG(ERROR, "Unexpected SIGSYS received.");
+ errno = old_errno;
+ return;
+ }
+
+
+ // Obtain the siginfo information that is specific to SIGSYS. Unfortunately,
+ // most versions of glibc don't include this information in siginfo_t. So,
+ // we need to explicitly copy it into a arch_sigsys structure.
+ struct arch_sigsys sigsys;
+ memcpy(&sigsys, &info->_sifields, sizeof(sigsys));
+
+#if defined(__mips__)
+ // When indirect syscall (syscall(__NR_foo, ...)) is made on Mips, the
+ // number in register SECCOMP_SYSCALL(ctx) is always __NR_syscall and the
+ // real number of a syscall (__NR_foo) is in SECCOMP_PARM1(ctx)
+ bool sigsys_nr_is_bad = sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)) &&
+ sigsys.nr != static_cast<int>(SECCOMP_PARM1(ctx));
+#else
+ bool sigsys_nr_is_bad = sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx));
+#endif
+
+ // Some more sanity checks.
+ if (sigsys.ip != reinterpret_cast<void*>(SECCOMP_IP(ctx)) ||
+ sigsys_nr_is_bad || sigsys.arch != SECCOMP_ARCH) {
+ // TODO(markus):
+ // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal
+ // safe and can lead to bugs. We should eventually implement a different
+ // logging and reporting mechanism that is safe to be called from
+ // the sigSys() handler.
+ RAW_SANDBOX_DIE("Sanity checks are failing after receiving SIGSYS.");
+ }
+
+ intptr_t rc;
+ if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) {
+ errno = old_errno;
+ if (sigsys.nr == __NR_clone) {
+ RAW_SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler.");
+ }
+#if defined(__mips__)
+ // Mips supports up to eight arguments for syscall.
+ // However, seccomp bpf can filter only up to six arguments, so using eight
+ // arguments has sense only when using UnsafeTrap() handler.
+ rc = Syscall::Call(SECCOMP_SYSCALL(ctx),
+ SECCOMP_PARM1(ctx),
+ SECCOMP_PARM2(ctx),
+ SECCOMP_PARM3(ctx),
+ SECCOMP_PARM4(ctx),
+ SECCOMP_PARM5(ctx),
+ SECCOMP_PARM6(ctx),
+ SECCOMP_PARM7(ctx),
+ SECCOMP_PARM8(ctx));
+#else
+ rc = Syscall::Call(SECCOMP_SYSCALL(ctx),
+ SECCOMP_PARM1(ctx),
+ SECCOMP_PARM2(ctx),
+ SECCOMP_PARM3(ctx),
+ SECCOMP_PARM4(ctx),
+ SECCOMP_PARM5(ctx),
+ SECCOMP_PARM6(ctx));
+#endif // defined(__mips__)
+ } else {
+ const TrapKey& trap = trap_array_[info->si_errno - 1];
+ if (!trap.safe) {
+ SetIsInSigHandler();
+ }
+
+ // Copy the seccomp-specific data into a arch_seccomp_data structure. This
+ // is what we are showing to TrapFnc callbacks that the system call
+ // evaluator registered with the sandbox.
+ struct arch_seccomp_data data = {
+ static_cast<int>(SECCOMP_SYSCALL(ctx)),
+ SECCOMP_ARCH,
+ reinterpret_cast<uint64_t>(sigsys.ip),
+ {static_cast<uint64_t>(SECCOMP_PARM1(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM2(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM3(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM4(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM5(ctx)),
+ static_cast<uint64_t>(SECCOMP_PARM6(ctx))}};
+
+ // Now call the TrapFnc callback associated with this particular instance
+ // of SECCOMP_RET_TRAP.
+ rc = trap.fnc(data, const_cast<void*>(trap.aux));
+ }
+
+ // Update the CPU register that stores the return code of the system call
+ // that we just handled, and restore "errno" to the value that it had
+ // before entering the signal handler.
+ Syscall::PutValueInUcontext(rc, ctx);
+ errno = old_errno;
+
+ return;
+}
+
+bool Trap::TrapKey::operator<(const TrapKey& o) const {
+ if (fnc != o.fnc) {
+ return fnc < o.fnc;
+ } else if (aux != o.aux) {
+ return aux < o.aux;
+ } else {
+ return safe < o.safe;
+ }
+}
+
+uint16_t Trap::Add(TrapFnc fnc, const void* aux, bool safe) {
+ if (!safe && !SandboxDebuggingAllowedByUser()) {
+ // Unless the user set the CHROME_SANDBOX_DEBUGGING environment variable,
+ // we never return an ErrorCode that is marked as "unsafe". This also
+ // means, the BPF compiler will never emit code that allow unsafe system
+ // calls to by-pass the filter (because they use the magic return address
+ // from Syscall::Call(-1)).
+
+ // This SANDBOX_DIE() can optionally be removed. It won't break security,
+ // but it might make error messages from the BPF compiler a little harder
+ // to understand. Removing the SANDBOX_DIE() allows callers to easily check
+ // whether unsafe traps are supported (by checking whether the returned
+ // ErrorCode is ET_INVALID).
+ SANDBOX_DIE(
+ "Cannot use unsafe traps unless CHROME_SANDBOX_DEBUGGING "
+ "is enabled");
+
+ return 0;
+ }
+
+ // Each unique pair of TrapFnc and auxiliary data make up a distinct instance
+ // of a SECCOMP_RET_TRAP.
+ TrapKey key(fnc, aux, safe);
+
+ // We return unique identifiers together with SECCOMP_RET_TRAP. This allows
+ // us to associate trap with the appropriate handler. The kernel allows us
+ // identifiers in the range from 0 to SECCOMP_RET_DATA (0xFFFF). We want to
+ // avoid 0, as it could be confused for a trap without any specific id.
+ // The nice thing about sequentially numbered identifiers is that we can also
+ // trivially look them up from our signal handler without making any system
+ // calls that might be async-signal-unsafe.
+ // In order to do so, we store all of our traps in a C-style trap_array_.
+
+ TrapIds::const_iterator iter = trap_ids_.find(key);
+ if (iter != trap_ids_.end()) {
+ // We have seen this pair before. Return the same id that we assigned
+ // earlier.
+ return iter->second;
+ }
+
+ // This is a new pair. Remember it and assign a new id.
+ if (trap_array_size_ >= SECCOMP_RET_DATA /* 0xFFFF */ ||
+ trap_array_size_ >= std::numeric_limits<uint16_t>::max()) {
+ // In practice, this is pretty much impossible to trigger, as there
+ // are other kernel limitations that restrict overall BPF program sizes.
+ SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances");
+ }
+
+ // Our callers ensure that there are no other threads accessing trap_array_
+ // concurrently (typically this is done by ensuring that we are single-
+ // threaded while the sandbox is being set up). But we nonetheless are
+ // modifying a live data structure that could be accessed any time a
+ // system call is made; as system calls could be triggering SIGSYS.
+ // So, we have to be extra careful that we update trap_array_ atomically.
+ // In particular, this means we shouldn't be using realloc() to resize it.
+ // Instead, we allocate a new array, copy the values, and then switch the
+ // pointer. We only really care about the pointer being updated atomically
+ // and the data that is pointed to being valid, as these are the only
+ // values accessed from the signal handler. It is OK if trap_array_size_
+ // is inconsistent with the pointer, as it is monotonously increasing.
+ // Also, we only care about compiler barriers, as the signal handler is
+ // triggered synchronously from a system call. We don't have to protect
+ // against issues with the memory model or with completely asynchronous
+ // events.
+ if (trap_array_size_ >= trap_array_capacity_) {
+ trap_array_capacity_ += kCapacityIncrement;
+ TrapKey* old_trap_array = trap_array_;
+ TrapKey* new_trap_array = new TrapKey[trap_array_capacity_];
+ std::copy_n(old_trap_array, trap_array_size_, new_trap_array);
+
+ // Language specs are unclear on whether the compiler is allowed to move
+ // the "delete[]" above our preceding assignments and/or memory moves,
+ // iff the compiler believes that "delete[]" doesn't have any other
+ // global side-effects.
+ // We insert optimization barriers to prevent this from happening.
+ // The first barrier is probably not needed, but better be explicit in
+ // what we want to tell the compiler.
+ // The clang developer mailing list couldn't answer whether this is a
+ // legitimate worry; but they at least thought that the barrier is
+ // sufficient to prevent the (so far hypothetical) problem of re-ordering
+ // of instructions by the compiler.
+ //
+ // TODO(mdempsky): Try to clean this up using base/atomicops or C++11
+ // atomics; see crbug.com/414363.
+ asm volatile("" : "=r"(new_trap_array) : "0"(new_trap_array) : "memory");
+ trap_array_ = new_trap_array;
+ asm volatile("" : "=r"(trap_array_) : "0"(trap_array_) : "memory");
+
+ delete[] old_trap_array;
+ }
+
+ uint16_t id = trap_array_size_ + 1;
+ trap_ids_[key] = id;
+ trap_array_[trap_array_size_] = key;
+ trap_array_size_++;
+ return id;
+}
+
+bool Trap::SandboxDebuggingAllowedByUser() {
+ const char* debug_flag = getenv(kSandboxDebuggingEnv);
+ return debug_flag && *debug_flag;
+}
+
+bool Trap::EnableUnsafeTraps() {
+ if (!has_unsafe_traps_) {
+ // Unsafe traps are a one-way fuse. Once enabled, they can never be turned
+ // off again.
+ // We only allow enabling unsafe traps, if the user explicitly set an
+ // appropriate environment variable. This prevents bugs that accidentally
+ // disable all sandboxing for all users.
+ if (SandboxDebuggingAllowedByUser()) {
+ // We only ever print this message once, when we enable unsafe traps the
+ // first time.
+ SANDBOX_INFO("WARNING! Disabling sandbox for debugging purposes");
+ has_unsafe_traps_ = true;
+ } else {
+ SANDBOX_INFO(
+ "Cannot disable sandbox and use unsafe traps unless "
+ "CHROME_SANDBOX_DEBUGGING is turned on first");
+ }
+ }
+ // Returns the, possibly updated, value of has_unsafe_traps_.
+ return has_unsafe_traps_;
+}
+
+Trap* Trap::global_trap_;
+
+} // namespace sandbox
diff --git a/sandbox/linux/seccomp-bpf/trap.h b/sandbox/linux/seccomp-bpf/trap.h
new file mode 100644
index 0000000000..50ac3fd1c3
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/trap.h
@@ -0,0 +1,85 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SECCOMP_BPF_TRAP_H__
+#define SANDBOX_LINUX_SECCOMP_BPF_TRAP_H__
+
+#include <stdint.h>
+
+#include <map>
+
+#include "base/macros.h"
+#include "sandbox/linux/bpf_dsl/trap_registry.h"
+#include "sandbox/linux/system_headers/linux_signal.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+
+// The Trap class allows a BPF filter program to branch out to user space by
+// raising a SIGSYS signal.
+// N.B.: This class does not perform any synchronization operations. If
+// modifications are made to any of the traps, it is the caller's
+// responsibility to ensure that this happens in a thread-safe fashion.
+// Preferably, that means that no other threads should be running at that
+// time. For the purposes of our sandbox, this assertion should always be
+// true. Threads are incompatible with the seccomp sandbox anyway.
+class SANDBOX_EXPORT Trap : public bpf_dsl::TrapRegistry {
+ public:
+ uint16_t Add(TrapFnc fnc, const void* aux, bool safe) override;
+
+ bool EnableUnsafeTraps() override;
+
+ // Registry returns the trap registry used by Trap's SIGSYS handler,
+ // creating it if necessary.
+ static bpf_dsl::TrapRegistry* Registry();
+
+ // SandboxDebuggingAllowedByUser returns whether the
+ // "CHROME_SANDBOX_DEBUGGING" environment variable is set.
+ static bool SandboxDebuggingAllowedByUser();
+
+ private:
+ struct TrapKey {
+ TrapKey() : fnc(NULL), aux(NULL), safe(false) {}
+ TrapKey(TrapFnc f, const void* a, bool s) : fnc(f), aux(a), safe(s) {}
+ TrapFnc fnc;
+ const void* aux;
+ bool safe;
+ bool operator<(const TrapKey&) const;
+ };
+ typedef std::map<TrapKey, uint16_t> TrapIds;
+
+ // Our constructor is private. A shared global instance is created
+ // automatically as needed.
+ Trap();
+
+ // The destructor is unimplemented as destroying this object would
+ // break subsequent system calls that trigger a SIGSYS.
+ ~Trap() = delete;
+
+ static void SigSysAction(int nr, LinuxSigInfo* info, void* void_context);
+
+ // Make sure that SigSys is not inlined in order to get slightly better crash
+ // dumps.
+ void SigSys(int nr, LinuxSigInfo* info, ucontext_t* ctx)
+ __attribute__((noinline));
+ // We have a global singleton that handles all of our SIGSYS traps. This
+ // variable must never be deallocated after it has been set up initially, as
+ // there is no way to reset in-kernel BPF filters that generate SIGSYS
+ // events.
+ static Trap* global_trap_;
+
+ TrapIds trap_ids_; // Maps from TrapKeys to numeric ids
+ TrapKey* trap_array_; // Array of TrapKeys indexed by ids
+ size_t trap_array_size_; // Currently used size of array
+ size_t trap_array_capacity_; // Currently allocated capacity of array
+ bool has_unsafe_traps_; // Whether unsafe traps have been enabled
+
+ // Copying and assigning is unimplemented. It doesn't make sense for a
+ // singleton.
+ DISALLOW_COPY_AND_ASSIGN(Trap);
+};
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SECCOMP_BPF_TRAP_H__
diff --git a/sandbox/linux/seccomp-bpf/trap_unittest.cc b/sandbox/linux/seccomp-bpf/trap_unittest.cc
new file mode 100644
index 0000000000..99f94bfb3a
--- /dev/null
+++ b/sandbox/linux/seccomp-bpf/trap_unittest.cc
@@ -0,0 +1,28 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/seccomp-bpf/trap.h"
+
+#include <signal.h>
+
+#include "sandbox/linux/tests/unit_tests.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace sandbox {
+namespace {
+
+SANDBOX_TEST_ALLOW_NOISE(Trap, SigSysAction) {
+ // This creates a global Trap instance, and registers the signal handler
+ // (Trap::SigSysAction).
+ Trap::Registry();
+
+ // Send SIGSYS to self. If signal handler (SigSysAction) is not registered,
+ // the process will be terminated with status code -SIGSYS.
+ // Note that, SigSysAction handler would output an error message
+ // "Unexpected SIGSYS received." so it is necessary to allow the noise.
+ raise(SIGSYS);
+}
+
+} // namespace
+} // namespace sandbox
diff --git a/sandbox/linux/services/credentials.cc b/sandbox/linux/services/credentials.cc
new file mode 100644
index 0000000000..c77fd9efde
--- /dev/null
+++ b/sandbox/linux/services/credentials.cc
@@ -0,0 +1,299 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/credentials.h"
+
+#include <errno.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "base/bind.h"
+#include "base/files/file_path.h"
+#include "base/files/file_util.h"
+#include "base/logging.h"
+#include "base/posix/eintr_wrapper.h"
+#include "base/process/launch.h"
+#include "base/template_util.h"
+#include "build/build_config.h"
+#include "sandbox/linux/services/namespace_utils.h"
+#include "sandbox/linux/services/proc_util.h"
+#include "sandbox/linux/services/syscall_wrappers.h"
+#include "sandbox/linux/services/thread_helpers.h"
+#include "sandbox/linux/system_headers/capability.h"
+#include "sandbox/linux/system_headers/linux_signal.h"
+#include "third_party/valgrind/valgrind.h"
+
+namespace sandbox {
+
+namespace {
+
+bool IsRunningOnValgrind() { return RUNNING_ON_VALGRIND; }
+
+// Checks that the set of RES-uids and the set of RES-gids have
+// one element each and return that element in |resuid| and |resgid|
+// respectively. It's ok to pass NULL as one or both of the ids.
+bool GetRESIds(uid_t* resuid, gid_t* resgid) {
+ uid_t ruid, euid, suid;
+ gid_t rgid, egid, sgid;
+ PCHECK(sys_getresuid(&ruid, &euid, &suid) == 0);
+ PCHECK(sys_getresgid(&rgid, &egid, &sgid) == 0);
+ const bool uids_are_equal = (ruid == euid) && (ruid == suid);
+ const bool gids_are_equal = (rgid == egid) && (rgid == sgid);
+ if (!uids_are_equal || !gids_are_equal) return false;
+ if (resuid) *resuid = euid;
+ if (resgid) *resgid = egid;
+ return true;
+}
+
+const int kExitSuccess = 0;
+
+int ChrootToSelfFdinfo(void*) {
+ RAW_CHECK(sys_chroot("/proc/self/fdinfo/") == 0);
+
+ // CWD is essentially an implicit file descriptor, so be careful to not
+ // leave it behind.
+ RAW_CHECK(chdir("/") == 0);
+ _exit(kExitSuccess);
+}
+
+// chroot() to an empty dir that is "safe". To be safe, it must not contain
+// any subdirectory (chroot-ing there would allow a chroot escape) and it must
+// be impossible to create an empty directory there.
+// We achieve this by doing the following:
+// 1. We create a new process sharing file system information.
+// 2. In the child, we chroot to /proc/self/fdinfo/
+// This is already "safe", since fdinfo/ does not contain another directory and
+// one cannot create another directory there.
+// 3. The process dies
+// After (3) happens, the directory is not available anymore in /proc.
+bool ChrootToSafeEmptyDir() {
+ // We need to chroot to a fdinfo that is unique to a process and have that
+ // process die.
+ // 1. We don't want to simply fork() because duplicating the page tables is
+ // slow with a big address space.
+ // 2. We do not use a regular thread (that would unshare CLONE_FILES) because
+ // when we are in a PID namespace, we cannot easily get a handle to the
+ // /proc/tid directory for the thread (since /proc may not be aware of the
+ // PID namespace). With a process, we can just use /proc/self.
+ pid_t pid = -1;
+ char stack_buf[PTHREAD_STACK_MIN];
+#if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \
+ defined(ARCH_CPU_MIPS64_FAMILY) || defined(ARCH_CPU_MIPS_FAMILY)
+ // The stack grows downward.
+ void* stack = stack_buf + sizeof(stack_buf);
+#else
+#error "Unsupported architecture"
+#endif
+
+ pid = clone(ChrootToSelfFdinfo, stack,
+ CLONE_VM | CLONE_VFORK | CLONE_FS | LINUX_SIGCHLD, nullptr,
+ nullptr, nullptr, nullptr);
+ PCHECK(pid != -1);
+
+ int status = -1;
+ PCHECK(HANDLE_EINTR(waitpid(pid, &status, 0)) == pid);
+
+ return WIFEXITED(status) && WEXITSTATUS(status) == kExitSuccess;
+}
+
+// CHECK() that an attempt to move to a new user namespace raised an expected
+// errno.
+void CheckCloneNewUserErrno(int error) {
+ // EPERM can happen if already in a chroot. EUSERS if too many nested
+ // namespaces are used. EINVAL for kernels that don't support the feature.
+ // Valgrind will ENOSYS unshare().
+ PCHECK(error == EPERM || error == EUSERS || error == EINVAL ||
+ error == ENOSYS);
+}
+
+// Converts a Capability to the corresponding Linux CAP_XXX value.
+int CapabilityToKernelValue(Credentials::Capability cap) {
+ switch (cap) {
+ case Credentials::Capability::SYS_CHROOT:
+ return CAP_SYS_CHROOT;
+ case Credentials::Capability::SYS_ADMIN:
+ return CAP_SYS_ADMIN;
+ }
+
+ LOG(FATAL) << "Invalid Capability: " << static_cast<int>(cap);
+ return 0;
+}
+
+} // namespace.
+
+// static
+bool Credentials::DropAllCapabilities(int proc_fd) {
+ if (!SetCapabilities(proc_fd, std::vector<Capability>())) {
+ return false;
+ }
+
+ CHECK(!HasAnyCapability());
+ return true;
+}
+
+// static
+bool Credentials::DropAllCapabilities() {
+ base::ScopedFD proc_fd(ProcUtil::OpenProc());
+ return Credentials::DropAllCapabilities(proc_fd.get());
+}
+
+// static
+bool Credentials::DropAllCapabilitiesOnCurrentThread() {
+ return SetCapabilitiesOnCurrentThread(std::vector<Capability>());
+}
+
+// static
+bool Credentials::SetCapabilitiesOnCurrentThread(
+ const std::vector<Capability>& caps) {
+ struct cap_hdr hdr = {};
+ hdr.version = _LINUX_CAPABILITY_VERSION_3;
+ struct cap_data data[_LINUX_CAPABILITY_U32S_3] = {{}};
+
+ // Initially, cap has no capability flags set. Enable the effective and
+ // permitted flags only for the requested capabilities.
+ for (const Capability cap : caps) {
+ const int cap_num = CapabilityToKernelValue(cap);
+ const size_t index = CAP_TO_INDEX(cap_num);
+ const uint32_t mask = CAP_TO_MASK(cap_num);
+ data[index].effective |= mask;
+ data[index].permitted |= mask;
+ }
+
+ return sys_capset(&hdr, data) == 0;
+}
+
+// static
+bool Credentials::SetCapabilities(int proc_fd,
+ const std::vector<Capability>& caps) {
+ DCHECK_LE(0, proc_fd);
+
+#if !defined(THREAD_SANITIZER)
+ // With TSAN, accept to break the security model as it is a testing
+ // configuration.
+ CHECK(ThreadHelpers::IsSingleThreaded(proc_fd));
+#endif
+
+ return SetCapabilitiesOnCurrentThread(caps);
+}
+
+bool Credentials::HasAnyCapability() {
+ struct cap_hdr hdr = {};
+ hdr.version = _LINUX_CAPABILITY_VERSION_3;
+ struct cap_data data[_LINUX_CAPABILITY_U32S_3] = {{}};
+
+ PCHECK(sys_capget(&hdr, data) == 0);
+
+ for (size_t i = 0; i < arraysize(data); ++i) {
+ if (data[i].effective || data[i].permitted || data[i].inheritable) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool Credentials::HasCapability(Capability cap) {
+ struct cap_hdr hdr = {};
+ hdr.version = _LINUX_CAPABILITY_VERSION_3;
+ struct cap_data data[_LINUX_CAPABILITY_U32S_3] = {{}};
+
+ PCHECK(sys_capget(&hdr, data) == 0);
+
+ const int cap_num = CapabilityToKernelValue(cap);
+ const size_t index = CAP_TO_INDEX(cap_num);
+ const uint32_t mask = CAP_TO_MASK(cap_num);
+
+ return (data[index].effective | data[index].permitted |
+ data[index].inheritable) &
+ mask;
+}
+
+// static
+bool Credentials::CanCreateProcessInNewUserNS() {
+ // Valgrind will let clone(2) pass-through, but doesn't support unshare(),
+ // so always consider UserNS unsupported there.
+ if (IsRunningOnValgrind()) {
+ return false;
+ }
+
+#if defined(THREAD_SANITIZER)
+ // With TSAN, processes will always have threads running and can never
+ // enter a new user namespace with MoveToNewUserNS().
+ return false;
+#endif
+
+ // This is roughly a fork().
+ const pid_t pid = sys_clone(CLONE_NEWUSER | SIGCHLD, 0, 0, 0, 0);
+
+ if (pid == -1) {
+ CheckCloneNewUserErrno(errno);
+ return false;
+ }
+
+ // The parent process could have had threads. In the child, these threads
+ // have disappeared. Make sure to not do anything in the child, as this is a
+ // fragile execution environment.
+ if (pid == 0) {
+ _exit(kExitSuccess);
+ }
+
+ // Always reap the child.
+ int status = -1;
+ PCHECK(HANDLE_EINTR(waitpid(pid, &status, 0)) == pid);
+ CHECK(WIFEXITED(status));
+ CHECK_EQ(kExitSuccess, WEXITSTATUS(status));
+
+ // clone(2) succeeded, we can use CLONE_NEWUSER.
+ return true;
+}
+
+bool Credentials::MoveToNewUserNS() {
+ uid_t uid;
+ gid_t gid;
+ if (!GetRESIds(&uid, &gid)) {
+ // If all the uids (or gids) are not equal to each other, the security
+ // model will most likely confuse the caller, abort.
+ DVLOG(1) << "uids or gids differ!";
+ return false;
+ }
+ int ret = sys_unshare(CLONE_NEWUSER);
+ if (ret) {
+ const int unshare_errno = errno;
+ VLOG(1) << "Looks like unprivileged CLONE_NEWUSER may not be available "
+ << "on this kernel.";
+ CheckCloneNewUserErrno(unshare_errno);
+ return false;
+ }
+
+ if (NamespaceUtils::KernelSupportsDenySetgroups()) {
+ PCHECK(NamespaceUtils::DenySetgroups());
+ }
+
+ // The current {r,e,s}{u,g}id is now an overflow id (c.f.
+ // /proc/sys/kernel/overflowuid). Setup the uid and gid maps.
+ DCHECK(GetRESIds(NULL, NULL));
+ const char kGidMapFile[] = "/proc/self/gid_map";
+ const char kUidMapFile[] = "/proc/self/uid_map";
+ PCHECK(NamespaceUtils::WriteToIdMapFile(kGidMapFile, gid));
+ PCHECK(NamespaceUtils::WriteToIdMapFile(kUidMapFile, uid));
+ DCHECK(GetRESIds(NULL, NULL));
+ return true;
+}
+
+bool Credentials::DropFileSystemAccess(int proc_fd) {
+ CHECK_LE(0, proc_fd);
+
+ CHECK(ChrootToSafeEmptyDir());
+ CHECK(!base::DirectoryExists(base::FilePath("/proc")));
+ CHECK(!ProcUtil::HasOpenDirectory(proc_fd));
+ // We never let this function fail.
+ return true;
+}
+
+} // namespace sandbox.
diff --git a/sandbox/linux/services/credentials.h b/sandbox/linux/services/credentials.h
new file mode 100644
index 0000000000..0001dc7328
--- /dev/null
+++ b/sandbox/linux/services/credentials.h
@@ -0,0 +1,104 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SERVICES_CREDENTIALS_H_
+#define SANDBOX_LINUX_SERVICES_CREDENTIALS_H_
+
+#include "build/build_config.h"
+// Link errors are tedious to track, raise a compile-time error instead.
+#if defined(OS_ANDROID)
+#error "Android is not supported."
+#endif // defined(OS_ANDROID).
+
+#include <string>
+#include <vector>
+
+#include "base/compiler_specific.h"
+#include "base/macros.h"
+#include "base/memory/scoped_ptr.h"
+#include "sandbox/linux/system_headers/capability.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+
+// This class should be used to manipulate the current process' credentials.
+// It is currently a stub used to manipulate POSIX.1e capabilities as
+// implemented by the Linux kernel.
+class SANDBOX_EXPORT Credentials {
+ public:
+ // For brevity, we only expose enums for the subset of capabilities we use.
+ // This can be expanded as the need arises.
+ enum class Capability {
+ SYS_CHROOT,
+ SYS_ADMIN,
+ };
+
+ // Drop all capabilities in the effective, inheritable and permitted sets for
+ // the current thread. For security reasons, since capabilities are
+ // per-thread, the caller is responsible for ensuring it is single-threaded
+ // when calling this API.
+ // |proc_fd| must be a file descriptor to /proc/ and remains owned by
+ // the caller.
+ static bool DropAllCapabilities(int proc_fd) WARN_UNUSED_RESULT;
+ // A similar API which assumes that it can open /proc/self/ by itself.
+ static bool DropAllCapabilities() WARN_UNUSED_RESULT;
+ // Sets the effective and permitted capability sets for the current thread to
+ // the list of capabiltiies in |caps|. All other capability flags are cleared.
+ static bool SetCapabilities(int proc_fd,
+ const std::vector<Capability>& caps)
+ WARN_UNUSED_RESULT;
+
+ // Versions of the above functions which do not check that the process is
+ // single-threaded. After calling these functions, capabilities of other
+ // threads will not be changed. This is dangerous, do not use unless you nkow
+ // what you are doing.
+ static bool DropAllCapabilitiesOnCurrentThread() WARN_UNUSED_RESULT;
+ static bool SetCapabilitiesOnCurrentThread(
+ const std::vector<Capability>& caps) WARN_UNUSED_RESULT;
+
+ // Returns true if the current thread has either the effective, permitted, or
+ // inheritable flag set for the given capability.
+ static bool HasCapability(Capability cap);
+
+ // Return true iff there is any capability in any of the capabilities sets
+ // of the current thread.
+ static bool HasAnyCapability();
+
+ // Returns whether the kernel supports CLONE_NEWUSER and whether it would be
+ // possible to immediately move to a new user namespace. There is no point
+ // in using this method right before calling MoveToNewUserNS(), simply call
+ // MoveToNewUserNS() immediately. This method is only useful to test the
+ // ability to move to a user namespace ahead of time.
+ static bool CanCreateProcessInNewUserNS();
+
+ // Move the current process to a new "user namespace" as supported by Linux
+ // 3.8+ (CLONE_NEWUSER).
+ // The uid map will be set-up so that the perceived uid and gid will not
+ // change.
+ // If this call succeeds, the current process will be granted a full set of
+ // capabilities in the new namespace.
+ // This will fail if the process is not mono-threaded.
+ static bool MoveToNewUserNS() WARN_UNUSED_RESULT;
+
+ // Remove the ability of the process to access the file system. File
+ // descriptors which are already open prior to calling this API remain
+ // available.
+ // The implementation currently uses chroot(2) and requires CAP_SYS_CHROOT.
+ // CAP_SYS_CHROOT can be acquired by using the MoveToNewUserNS() API.
+ // |proc_fd| must be a file descriptor to /proc/ and must be the only open
+ // directory file descriptor of the process.
+ //
+ // CRITICAL:
+ // - the caller must close |proc_fd| eventually or access to the file
+ // system can be recovered.
+ // - DropAllCapabilities() must be called to prevent escapes.
+ static bool DropFileSystemAccess(int proc_fd) WARN_UNUSED_RESULT;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Credentials);
+};
+
+} // namespace sandbox.
+
+#endif // SANDBOX_LINUX_SERVICES_CREDENTIALS_H_
diff --git a/sandbox/linux/services/credentials_unittest.cc b/sandbox/linux/services/credentials_unittest.cc
new file mode 100644
index 0000000000..6b93c86c3e
--- /dev/null
+++ b/sandbox/linux/services/credentials_unittest.cc
@@ -0,0 +1,242 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/credentials.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/capability.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <vector>
+
+#include "base/files/file_path.h"
+#include "base/files/file_util.h"
+#include "base/files/scoped_file.h"
+#include "base/logging.h"
+#include "base/memory/scoped_ptr.h"
+#include "sandbox/linux/services/proc_util.h"
+#include "sandbox/linux/services/syscall_wrappers.h"
+#include "sandbox/linux/system_headers/capability.h"
+#include "sandbox/linux/tests/unit_tests.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace sandbox {
+
+namespace {
+
+struct CapFreeDeleter {
+ inline void operator()(cap_t cap) const {
+ int ret = cap_free(cap);
+ CHECK_EQ(0, ret);
+ }
+};
+
+// Wrapper to manage libcap2's cap_t type.
+typedef scoped_ptr<typeof(*((cap_t)0)), CapFreeDeleter> ScopedCap;
+
+bool WorkingDirectoryIsRoot() {
+ char current_dir[PATH_MAX];
+ char* cwd = getcwd(current_dir, sizeof(current_dir));
+ PCHECK(cwd);
+ if (strcmp("/", cwd)) return false;
+
+ // The current directory is the root. Add a few paranoid checks.
+ struct stat current;
+ CHECK_EQ(0, stat(".", &current));
+ struct stat parrent;
+ CHECK_EQ(0, stat("..", &parrent));
+ CHECK_EQ(current.st_dev, parrent.st_dev);
+ CHECK_EQ(current.st_ino, parrent.st_ino);
+ CHECK_EQ(current.st_mode, parrent.st_mode);
+ CHECK_EQ(current.st_uid, parrent.st_uid);
+ CHECK_EQ(current.st_gid, parrent.st_gid);
+ return true;
+}
+
+SANDBOX_TEST(Credentials, DropAllCaps) {
+ CHECK(Credentials::DropAllCapabilities());
+ CHECK(!Credentials::HasAnyCapability());
+}
+
+SANDBOX_TEST(Credentials, MoveToNewUserNS) {
+ CHECK(Credentials::DropAllCapabilities());
+ bool moved_to_new_ns = Credentials::MoveToNewUserNS();
+ fprintf(stdout,
+ "Unprivileged CLONE_NEWUSER supported: %s\n",
+ moved_to_new_ns ? "true." : "false.");
+ fflush(stdout);
+ if (!moved_to_new_ns) {
+ fprintf(stdout, "This kernel does not support unprivileged namespaces. "
+ "USERNS tests will succeed without running.\n");
+ fflush(stdout);
+ return;
+ }
+ CHECK(Credentials::HasAnyCapability());
+ CHECK(Credentials::DropAllCapabilities());
+ CHECK(!Credentials::HasAnyCapability());
+}
+
+SANDBOX_TEST(Credentials, CanCreateProcessInNewUserNS) {
+ CHECK(Credentials::DropAllCapabilities());
+ bool user_ns_supported = Credentials::CanCreateProcessInNewUserNS();
+ bool moved_to_new_ns = Credentials::MoveToNewUserNS();
+ CHECK_EQ(user_ns_supported, moved_to_new_ns);
+}
+
+SANDBOX_TEST(Credentials, UidIsPreserved) {
+ CHECK(Credentials::DropAllCapabilities());
+ uid_t old_ruid, old_euid, old_suid;
+ gid_t old_rgid, old_egid, old_sgid;
+ PCHECK(0 == getresuid(&old_ruid, &old_euid, &old_suid));
+ PCHECK(0 == getresgid(&old_rgid, &old_egid, &old_sgid));
+ // Probably missing kernel support.
+ if (!Credentials::MoveToNewUserNS()) return;
+ uid_t new_ruid, new_euid, new_suid;
+ PCHECK(0 == getresuid(&new_ruid, &new_euid, &new_suid));
+ CHECK(old_ruid == new_ruid);
+ CHECK(old_euid == new_euid);
+ CHECK(old_suid == new_suid);
+
+ gid_t new_rgid, new_egid, new_sgid;
+ PCHECK(0 == getresgid(&new_rgid, &new_egid, &new_sgid));
+ CHECK(old_rgid == new_rgid);
+ CHECK(old_egid == new_egid);
+ CHECK(old_sgid == new_sgid);
+}
+
+bool NewUserNSCycle() {
+ if (!Credentials::MoveToNewUserNS() ||
+ !Credentials::HasAnyCapability() ||
+ !Credentials::DropAllCapabilities() ||
+ Credentials::HasAnyCapability()) {
+ return false;
+ }
+ return true;
+}
+
+SANDBOX_TEST(Credentials, NestedUserNS) {
+ CHECK(Credentials::DropAllCapabilities());
+ // Probably missing kernel support.
+ if (!Credentials::MoveToNewUserNS()) return;
+ CHECK(Credentials::DropAllCapabilities());
+ // As of 3.12, the kernel has a limit of 32. See create_user_ns().
+ const int kNestLevel = 10;
+ for (int i = 0; i < kNestLevel; ++i) {
+ CHECK(NewUserNSCycle()) << "Creating new user NS failed at iteration "
+ << i << ".";
+ }
+}
+
+// Test the WorkingDirectoryIsRoot() helper.
+SANDBOX_TEST(Credentials, CanDetectRoot) {
+ PCHECK(0 == chdir("/proc/"));
+ CHECK(!WorkingDirectoryIsRoot());
+ PCHECK(0 == chdir("/"));
+ CHECK(WorkingDirectoryIsRoot());
+}
+
+// Disabled on ASAN because of crbug.com/451603.
+SANDBOX_TEST(Credentials, DISABLE_ON_ASAN(DropFileSystemAccessIsSafe)) {
+ CHECK(Credentials::DropAllCapabilities());
+ // Probably missing kernel support.
+ if (!Credentials::MoveToNewUserNS()) return;
+ CHECK(Credentials::DropFileSystemAccess(ProcUtil::OpenProc().get()));
+ CHECK(!base::DirectoryExists(base::FilePath("/proc")));
+ CHECK(WorkingDirectoryIsRoot());
+ CHECK(base::IsDirectoryEmpty(base::FilePath("/")));
+ // We want the chroot to never have a subdirectory. A subdirectory
+ // could allow a chroot escape.
+ CHECK_NE(0, mkdir("/test", 0700));
+}
+
+// Check that after dropping filesystem access and dropping privileges
+// it is not possible to regain capabilities.
+SANDBOX_TEST(Credentials, DISABLE_ON_ASAN(CannotRegainPrivileges)) {
+ base::ScopedFD proc_fd(ProcUtil::OpenProc());
+ CHECK(Credentials::DropAllCapabilities(proc_fd.get()));
+ // Probably missing kernel support.
+ if (!Credentials::MoveToNewUserNS()) return;
+ CHECK(Credentials::DropFileSystemAccess(proc_fd.get()));
+ CHECK(Credentials::DropAllCapabilities(proc_fd.get()));
+
+ // The kernel should now prevent us from regaining capabilities because we
+ // are in a chroot.
+ CHECK(!Credentials::CanCreateProcessInNewUserNS());
+ CHECK(!Credentials::MoveToNewUserNS());
+}
+
+SANDBOX_TEST(Credentials, SetCapabilities) {
+ // Probably missing kernel support.
+ if (!Credentials::MoveToNewUserNS())
+ return;
+
+ base::ScopedFD proc_fd(ProcUtil::OpenProc());
+
+ CHECK(Credentials::HasCapability(Credentials::Capability::SYS_ADMIN));
+ CHECK(Credentials::HasCapability(Credentials::Capability::SYS_CHROOT));
+
+ std::vector<Credentials::Capability> caps;
+ caps.push_back(Credentials::Capability::SYS_CHROOT);
+ CHECK(Credentials::SetCapabilities(proc_fd.get(), caps));
+
+ CHECK(!Credentials::HasCapability(Credentials::Capability::SYS_ADMIN));
+ CHECK(Credentials::HasCapability(Credentials::Capability::SYS_CHROOT));
+
+ const std::vector<Credentials::Capability> no_caps;
+ CHECK(Credentials::SetCapabilities(proc_fd.get(), no_caps));
+ CHECK(!Credentials::HasAnyCapability());
+}
+
+SANDBOX_TEST(Credentials, SetCapabilitiesAndChroot) {
+ // Probably missing kernel support.
+ if (!Credentials::MoveToNewUserNS())
+ return;
+
+ base::ScopedFD proc_fd(ProcUtil::OpenProc());
+
+ CHECK(Credentials::HasCapability(Credentials::Capability::SYS_CHROOT));
+ PCHECK(chroot("/") == 0);
+
+ std::vector<Credentials::Capability> caps;
+ caps.push_back(Credentials::Capability::SYS_CHROOT);
+ CHECK(Credentials::SetCapabilities(proc_fd.get(), caps));
+ PCHECK(chroot("/") == 0);
+
+ CHECK(Credentials::DropAllCapabilities());
+ PCHECK(chroot("/") == -1 && errno == EPERM);
+}
+
+SANDBOX_TEST(Credentials, SetCapabilitiesMatchesLibCap2) {
+ // Probably missing kernel support.
+ if (!Credentials::MoveToNewUserNS())
+ return;
+
+ base::ScopedFD proc_fd(ProcUtil::OpenProc());
+
+ std::vector<Credentials::Capability> caps;
+ caps.push_back(Credentials::Capability::SYS_CHROOT);
+ CHECK(Credentials::SetCapabilities(proc_fd.get(), caps));
+
+ ScopedCap actual_cap(cap_get_proc());
+ PCHECK(actual_cap != nullptr);
+
+ ScopedCap expected_cap(cap_init());
+ PCHECK(expected_cap != nullptr);
+
+ const cap_value_t allowed_cap = CAP_SYS_CHROOT;
+ for (const cap_flag_t flag : {CAP_EFFECTIVE, CAP_PERMITTED}) {
+ PCHECK(cap_set_flag(expected_cap.get(), flag, 1, &allowed_cap, CAP_SET) ==
+ 0);
+ }
+
+ CHECK_EQ(0, cap_compare(expected_cap.get(), actual_cap.get()));
+}
+
+} // namespace.
+
+} // namespace sandbox.
diff --git a/sandbox/linux/services/init_process_reaper.cc b/sandbox/linux/services/init_process_reaper.cc
new file mode 100644
index 0000000000..2e0b90b7b5
--- /dev/null
+++ b/sandbox/linux/services/init_process_reaper.cc
@@ -0,0 +1,101 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/init_process_reaper.h"
+
+#include <signal.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "base/callback.h"
+#include "base/logging.h"
+#include "base/posix/eintr_wrapper.h"
+
+namespace sandbox {
+
+namespace {
+
+void DoNothingSignalHandler(int signal) {}
+
+} // namespace
+
+bool CreateInitProcessReaper(base::Closure* post_fork_parent_callback) {
+ int sync_fds[2];
+ // We want to use send, so we can't use a pipe
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, sync_fds)) {
+ PLOG(ERROR) << "Failed to create socketpair";
+ return false;
+ }
+ pid_t child_pid = fork();
+ if (child_pid == -1) {
+ int close_ret;
+ close_ret = IGNORE_EINTR(close(sync_fds[0]));
+ DPCHECK(!close_ret);
+ close_ret = IGNORE_EINTR(close(sync_fds[1]));
+ DPCHECK(!close_ret);
+ return false;
+ }
+ if (child_pid) {
+ // In the parent, assuming the role of an init process.
+ // The disposition for SIGCHLD cannot be SIG_IGN or wait() will only return
+ // once all of our childs are dead. Since we're init we need to reap childs
+ // as they come.
+ struct sigaction action;
+ memset(&action, 0, sizeof(action));
+ action.sa_handler = &DoNothingSignalHandler;
+ CHECK(sigaction(SIGCHLD, &action, NULL) == 0);
+
+ int close_ret;
+ close_ret = IGNORE_EINTR(close(sync_fds[0]));
+ DPCHECK(!close_ret);
+ close_ret = shutdown(sync_fds[1], SHUT_RD);
+ DPCHECK(!close_ret);
+ if (post_fork_parent_callback)
+ post_fork_parent_callback->Run();
+ // Tell the child to continue
+ CHECK(HANDLE_EINTR(send(sync_fds[1], "C", 1, MSG_NOSIGNAL)) == 1);
+ close_ret = IGNORE_EINTR(close(sync_fds[1]));
+ DPCHECK(!close_ret);
+
+ for (;;) {
+ // Loop until we have reaped our one natural child
+ siginfo_t reaped_child_info;
+ int wait_ret =
+ HANDLE_EINTR(waitid(P_ALL, 0, &reaped_child_info, WEXITED));
+ if (wait_ret)
+ _exit(1);
+ if (reaped_child_info.si_pid == child_pid) {
+ int exit_code = 0;
+ // We're done waiting
+ if (reaped_child_info.si_code == CLD_EXITED) {
+ exit_code = reaped_child_info.si_status;
+ }
+ // Exit with the same exit code as our parent. Exit with 0 if we got
+ // signaled.
+ _exit(exit_code);
+ }
+ }
+ } else {
+ // The child needs to wait for the parent to run the callback to avoid a
+ // race condition.
+ int close_ret;
+ close_ret = IGNORE_EINTR(close(sync_fds[1]));
+ DPCHECK(!close_ret);
+ close_ret = shutdown(sync_fds[0], SHUT_WR);
+ DPCHECK(!close_ret);
+ char should_continue;
+ int read_ret = HANDLE_EINTR(read(sync_fds[0], &should_continue, 1));
+ close_ret = IGNORE_EINTR(close(sync_fds[0]));
+ DPCHECK(!close_ret);
+ if (read_ret == 1)
+ return true;
+ else
+ return false;
+ }
+}
+
+} // namespace sandbox.
diff --git a/sandbox/linux/services/init_process_reaper.h b/sandbox/linux/services/init_process_reaper.h
new file mode 100644
index 0000000000..840f6fcda7
--- /dev/null
+++ b/sandbox/linux/services/init_process_reaper.h
@@ -0,0 +1,25 @@
+// Copyright 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SERVICES_INIT_PROCESS_REAPER_H_
+#define SANDBOX_LINUX_SERVICES_INIT_PROCESS_REAPER_H_
+
+#include "base/callback_forward.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+
+// The current process will fork(). The parent will become a process reaper
+// like init(1). The child will continue normally (after this function
+// returns).
+// If not NULL, |post_fork_parent_callback| will run in the parent almost
+// immediately after fork().
+// Since this function calls fork(), it's very important that the caller has
+// only one thread running.
+SANDBOX_EXPORT bool CreateInitProcessReaper(
+ base::Closure* post_fork_parent_callback);
+
+} // namespace sandbox.
+
+#endif // SANDBOX_LINUX_SERVICES_INIT_PROCESS_REAPER_H_
diff --git a/sandbox/linux/services/namespace_sandbox.cc b/sandbox/linux/services/namespace_sandbox.cc
new file mode 100644
index 0000000000..23796446f3
--- /dev/null
+++ b/sandbox/linux/services/namespace_sandbox.cc
@@ -0,0 +1,208 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/namespace_sandbox.h"
+
+#include <sched.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "base/command_line.h"
+#include "base/environment.h"
+#include "base/files/scoped_file.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/posix/eintr_wrapper.h"
+#include "base/process/launch.h"
+#include "base/process/process.h"
+#include "sandbox/linux/services/credentials.h"
+#include "sandbox/linux/services/namespace_utils.h"
+
+namespace sandbox {
+
+namespace {
+
+const char kSandboxUSERNSEnvironmentVarName[] = "SBX_USER_NS";
+const char kSandboxPIDNSEnvironmentVarName[] = "SBX_PID_NS";
+const char kSandboxNETNSEnvironmentVarName[] = "SBX_NET_NS";
+
+#if !defined(OS_NACL_NONSFI)
+class WriteUidGidMapDelegate : public base::LaunchOptions::PreExecDelegate {
+ public:
+ WriteUidGidMapDelegate()
+ : uid_(getuid()),
+ gid_(getgid()),
+ supports_deny_setgroups_(
+ NamespaceUtils::KernelSupportsDenySetgroups()) {}
+
+ ~WriteUidGidMapDelegate() override {}
+
+ void RunAsyncSafe() override {
+ if (supports_deny_setgroups_) {
+ RAW_CHECK(NamespaceUtils::DenySetgroups());
+ }
+ RAW_CHECK(NamespaceUtils::WriteToIdMapFile("/proc/self/uid_map", uid_));
+ RAW_CHECK(NamespaceUtils::WriteToIdMapFile("/proc/self/gid_map", gid_));
+ }
+
+ private:
+ const uid_t uid_;
+ const gid_t gid_;
+ const bool supports_deny_setgroups_;
+ DISALLOW_COPY_AND_ASSIGN(WriteUidGidMapDelegate);
+};
+
+void SetEnvironForNamespaceType(base::EnvironmentMap* environ,
+ base::NativeEnvironmentString env_var,
+ bool value) {
+ // An empty string causes the env var to be unset in the child process.
+ (*environ)[env_var] = value ? "1" : "";
+}
+
+// Linux supports up to 64 signals. This should be updated if that ever changes.
+int g_signal_exit_codes[64];
+
+void TerminationSignalHandler(int sig) {
+ // Return a special exit code so that the process is detected as terminated by
+ // a signal.
+ const size_t sig_idx = static_cast<size_t>(sig);
+ if (sig_idx < arraysize(g_signal_exit_codes)) {
+ _exit(g_signal_exit_codes[sig_idx]);
+ }
+
+ _exit(NamespaceSandbox::kDefaultExitCode);
+}
+#endif // !defined(OS_NACL_NONSFI)
+
+} // namespace
+
+#if !defined(OS_NACL_NONSFI)
+// static
+base::Process NamespaceSandbox::LaunchProcess(
+ const base::CommandLine& cmdline,
+ const base::LaunchOptions& options) {
+ return LaunchProcess(cmdline.argv(), options);
+}
+
+// static
+base::Process NamespaceSandbox::LaunchProcess(
+ const std::vector<std::string>& argv,
+ const base::LaunchOptions& options) {
+ int clone_flags = 0;
+ int ns_types[] = {CLONE_NEWUSER, CLONE_NEWPID, CLONE_NEWNET};
+ for (const int ns_type : ns_types) {
+ if (NamespaceUtils::KernelSupportsUnprivilegedNamespace(ns_type)) {
+ clone_flags |= ns_type;
+ }
+ }
+ CHECK(clone_flags & CLONE_NEWUSER);
+
+ // These fields may not be set by the caller.
+ CHECK(options.pre_exec_delegate == nullptr);
+ CHECK_EQ(0, options.clone_flags);
+
+ WriteUidGidMapDelegate write_uid_gid_map_delegate;
+
+ base::LaunchOptions launch_options = options;
+ launch_options.pre_exec_delegate = &write_uid_gid_map_delegate;
+ launch_options.clone_flags = clone_flags;
+
+ const std::pair<int, const char*> clone_flag_environ[] = {
+ std::make_pair(CLONE_NEWUSER, kSandboxUSERNSEnvironmentVarName),
+ std::make_pair(CLONE_NEWPID, kSandboxPIDNSEnvironmentVarName),
+ std::make_pair(CLONE_NEWNET, kSandboxNETNSEnvironmentVarName),
+ };
+
+ base::EnvironmentMap* environ = &launch_options.environ;
+ for (const auto& entry : clone_flag_environ) {
+ const int flag = entry.first;
+ const char* environ_name = entry.second;
+ SetEnvironForNamespaceType(environ, environ_name, clone_flags & flag);
+ }
+
+ return base::LaunchProcess(argv, launch_options);
+}
+
+// static
+pid_t NamespaceSandbox::ForkInNewPidNamespace(bool drop_capabilities_in_child) {
+ const pid_t pid =
+ base::ForkWithFlags(CLONE_NEWPID | SIGCHLD, nullptr, nullptr);
+ if (pid < 0) {
+ return pid;
+ }
+
+ if (pid == 0) {
+ DCHECK_EQ(1, getpid());
+ if (drop_capabilities_in_child) {
+ // Since we just forked, we are single-threaded, so this should be safe.
+ CHECK(Credentials::DropAllCapabilitiesOnCurrentThread());
+ }
+ return 0;
+ }
+
+ return pid;
+}
+
+// static
+void NamespaceSandbox::InstallDefaultTerminationSignalHandlers() {
+ static const int kDefaultTermSignals[] = {
+ SIGHUP, SIGINT, SIGABRT, SIGQUIT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2,
+ };
+
+ for (const int sig : kDefaultTermSignals) {
+ InstallTerminationSignalHandler(sig, kDefaultExitCode);
+ }
+}
+
+// static
+bool NamespaceSandbox::InstallTerminationSignalHandler(
+ int sig,
+ int exit_code) {
+ struct sigaction old_action;
+ PCHECK(sigaction(sig, nullptr, &old_action) == 0);
+
+ if (old_action.sa_flags & SA_SIGINFO &&
+ old_action.sa_sigaction != nullptr) {
+ return false;
+ } else if (old_action.sa_handler != SIG_DFL) {
+ return false;
+ }
+
+ const size_t sig_idx = static_cast<size_t>(sig);
+ CHECK_LT(sig_idx, arraysize(g_signal_exit_codes));
+
+ DCHECK_GE(exit_code, 0);
+ DCHECK_LT(exit_code, 256);
+
+ g_signal_exit_codes[sig_idx] = exit_code;
+
+ struct sigaction action = {};
+ action.sa_handler = &TerminationSignalHandler;
+ PCHECK(sigaction(sig, &action, nullptr) == 0);
+ return true;
+}
+#endif // !defined(OS_NACL_NONSFI)
+
+// static
+bool NamespaceSandbox::InNewUserNamespace() {
+ return getenv(kSandboxUSERNSEnvironmentVarName) != nullptr;
+}
+
+// static
+bool NamespaceSandbox::InNewPidNamespace() {
+ return getenv(kSandboxPIDNSEnvironmentVarName) != nullptr;
+}
+
+// static
+bool NamespaceSandbox::InNewNetNamespace() {
+ return getenv(kSandboxNETNSEnvironmentVarName) != nullptr;
+}
+
+} // namespace sandbox
diff --git a/sandbox/linux/services/namespace_sandbox.h b/sandbox/linux/services/namespace_sandbox.h
new file mode 100644
index 0000000000..80097fb16a
--- /dev/null
+++ b/sandbox/linux/services/namespace_sandbox.h
@@ -0,0 +1,101 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SERVICES_NAMESPACE_SANDBOX_H_
+#define SANDBOX_LINUX_SERVICES_NAMESPACE_SANDBOX_H_
+
+#include <sys/types.h>
+
+#include <string>
+#include <vector>
+
+#include "base/command_line.h"
+#include "base/macros.h"
+#include "base/process/launch.h"
+#include "base/process/process.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+
+// Helper class for starting a process inside a new user, PID, and network
+// namespace. Before using a namespace sandbox, check for namespaces support
+// using Credentials::CanCreateProcessInNewUserNS.
+//
+// A typical use for "A" launching a sandboxed process "B" would be:
+// 1. A sets up a command line and launch options for process B.
+// 2. A launches B with LaunchProcess.
+// 3. B should be prepared to assume the role of init(1). In particular, apart
+// from SIGKILL and SIGSTOP, B cannot receive any signal for which it does
+// not have an explicit signal handler registered.
+// If B dies, all the processes in the namespace will die.
+// B can fork() and the parent can assume the role of init(1), by using
+// CreateInitProcessReaper().
+// 4. B chroots using Credentials::MoveToNewUserNS() and
+// Credentials::DropFileSystemAccess()
+// 5. B drops capabilities gained by entering the new user namespace with
+// Credentials::DropAllCapabilities().
+class SANDBOX_EXPORT NamespaceSandbox {
+ public:
+#if !defined(OS_NACL_NONSFI)
+ static const int kDefaultExitCode = 1;
+
+ // Launch a new process inside its own user/PID/network namespaces (depending
+ // on kernel support). Requires at a minimum that user namespaces are
+ // supported (use Credentials::CanCreateProcessInNewUserNS to check this).
+ //
+ // pre_exec_delegate and clone_flags fields of LaunchOptions should be nullptr
+ // and 0, respectively, since this function makes a copy of options and
+ // overrides them.
+ static base::Process LaunchProcess(const base::CommandLine& cmdline,
+ const base::LaunchOptions& options);
+ static base::Process LaunchProcess(const std::vector<std::string>& argv,
+ const base::LaunchOptions& options);
+
+ // Forks a process in its own PID namespace. The child process is the init
+ // process inside of the PID namespace, so if the child needs to fork further,
+ // it should call CreateInitProcessReaper, which turns the init process into a
+ // reaper process.
+ //
+ // Otherwise, the child should setup handlers for signals which should
+ // terminate the process using InstallDefaultTerminationSignalHandlers or
+ // InstallTerminationSignalHandler. This works around the fact that init
+ // processes ignore such signals unless they have an explicit handler set.
+ //
+ // This function requries CAP_SYS_ADMIN. If |drop_capabilities_in_child| is
+ // true, then capabilities are dropped in the child.
+ static pid_t ForkInNewPidNamespace(bool drop_capabilities_in_child);
+
+ // Installs a signal handler for:
+ //
+ // SIGHUP, SIGINT, SIGABRT, SIGQUIT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2
+ //
+ // that exits with kDefaultExitCode. These are signals whose default action is
+ // to terminate the program (apart from SIGILL, SIGFPE, and SIGSEGV, which
+ // will still terminate the process if e.g. an illegal instruction is
+ // encountered, etc.).
+ //
+ // If any of these already had a signal handler installed, this function will
+ // not override them.
+ static void InstallDefaultTerminationSignalHandlers();
+
+ // Installs a signal handler for |sig| which exits with |exit_code|. If a
+ // signal handler was already present for |sig|, does nothing and returns
+ // false.
+ static bool InstallTerminationSignalHandler(int sig, int exit_code);
+#endif // !defined(OS_NACL_NONSFI)
+
+ // Returns whether the namespace sandbox created a new user, PID, and network
+ // namespace. In particular, InNewUserNamespace should return true iff the
+ // process was started via this class.
+ static bool InNewUserNamespace();
+ static bool InNewPidNamespace();
+ static bool InNewNetNamespace();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(NamespaceSandbox);
+};
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SERVICES_NAMESPACE_SANDBOX_H_
diff --git a/sandbox/linux/services/namespace_sandbox_unittest.cc b/sandbox/linux/services/namespace_sandbox_unittest.cc
new file mode 100644
index 0000000000..547ef6728c
--- /dev/null
+++ b/sandbox/linux/services/namespace_sandbox_unittest.cc
@@ -0,0 +1,217 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/namespace_sandbox.h"
+
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <string>
+#include <utility>
+
+#include "base/command_line.h"
+#include "base/files/file_enumerator.h"
+#include "base/files/file_path.h"
+#include "base/logging.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/process/launch.h"
+#include "base/process/process.h"
+#include "base/test/multiprocess_test.h"
+#include "sandbox/linux/services/credentials.h"
+#include "sandbox/linux/services/namespace_utils.h"
+#include "sandbox/linux/services/proc_util.h"
+#include "sandbox/linux/tests/unit_tests.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "testing/multiprocess_func_list.h"
+
+namespace sandbox {
+
+namespace {
+
+bool RootDirectoryIsEmpty() {
+ base::FilePath root("/");
+ int file_type =
+ base::FileEnumerator::DIRECTORIES | base::FileEnumerator::FILES;
+ base::FileEnumerator enumerator_before(root, false, file_type);
+ return enumerator_before.Next().empty();
+}
+
+class NamespaceSandboxTest : public base::MultiProcessTest {
+ public:
+ void TestProc(const std::string& procname) {
+ if (!Credentials::CanCreateProcessInNewUserNS()) {
+ return;
+ }
+
+ base::FileHandleMappingVector fds_to_remap = {
+ std::make_pair(STDOUT_FILENO, STDOUT_FILENO),
+ std::make_pair(STDERR_FILENO, STDERR_FILENO),
+ };
+ base::LaunchOptions launch_options;
+ launch_options.fds_to_remap = &fds_to_remap;
+
+ base::Process process =
+ NamespaceSandbox::LaunchProcess(MakeCmdLine(procname), launch_options);
+ ASSERT_TRUE(process.IsValid());
+
+ const int kDummyExitCode = 42;
+ int exit_code = kDummyExitCode;
+ EXPECT_TRUE(process.WaitForExit(&exit_code));
+ EXPECT_EQ(0, exit_code);
+ }
+};
+
+MULTIPROCESS_TEST_MAIN(SimpleChildProcess) {
+ scoped_ptr<base::Environment> env(base::Environment::Create());
+ bool in_user_ns = NamespaceSandbox::InNewUserNamespace();
+ bool in_pid_ns = NamespaceSandbox::InNewPidNamespace();
+ bool in_net_ns = NamespaceSandbox::InNewNetNamespace();
+ CHECK(in_user_ns);
+ CHECK_EQ(in_pid_ns,
+ NamespaceUtils::KernelSupportsUnprivilegedNamespace(CLONE_NEWPID));
+ CHECK_EQ(in_net_ns,
+ NamespaceUtils::KernelSupportsUnprivilegedNamespace(CLONE_NEWNET));
+ if (in_pid_ns) {
+ CHECK_EQ(1, getpid());
+ }
+ return 0;
+}
+
+TEST_F(NamespaceSandboxTest, BasicUsage) {
+ TestProc("SimpleChildProcess");
+}
+
+MULTIPROCESS_TEST_MAIN(ChrootMe) {
+ CHECK(!RootDirectoryIsEmpty());
+ CHECK(sandbox::Credentials::MoveToNewUserNS());
+ CHECK(sandbox::Credentials::DropFileSystemAccess(ProcUtil::OpenProc().get()));
+ CHECK(RootDirectoryIsEmpty());
+ return 0;
+}
+
+// Temporarily disabled on ASAN due to crbug.com/451603.
+TEST_F(NamespaceSandboxTest, DISABLE_ON_ASAN(ChrootAndDropCapabilities)) {
+ TestProc("ChrootMe");
+}
+
+MULTIPROCESS_TEST_MAIN(NestedNamespaceSandbox) {
+ base::FileHandleMappingVector fds_to_remap = {
+ std::make_pair(STDOUT_FILENO, STDOUT_FILENO),
+ std::make_pair(STDERR_FILENO, STDERR_FILENO),
+ };
+ base::LaunchOptions launch_options;
+ launch_options.fds_to_remap = &fds_to_remap;
+ base::Process process = NamespaceSandbox::LaunchProcess(
+ base::CommandLine(base::FilePath("/bin/true")), launch_options);
+ CHECK(process.IsValid());
+
+ const int kDummyExitCode = 42;
+ int exit_code = kDummyExitCode;
+ CHECK(process.WaitForExit(&exit_code));
+ CHECK_EQ(0, exit_code);
+ return 0;
+}
+
+TEST_F(NamespaceSandboxTest, NestedNamespaceSandbox) {
+ TestProc("NestedNamespaceSandbox");
+}
+
+const int kNormalExitCode = 0;
+const int kSignalTerminationExitCode = 255;
+
+// Ensure that CHECK(false) is distinguishable from _exit(kNormalExitCode).
+// Allowing noise since CHECK(false) will write a stack trace to stderr.
+SANDBOX_TEST_ALLOW_NOISE(ForkInNewPidNamespace, CheckDoesNotReturnZero) {
+ if (!Credentials::CanCreateProcessInNewUserNS()) {
+ return;
+ }
+
+ CHECK(sandbox::Credentials::MoveToNewUserNS());
+ const pid_t pid = NamespaceSandbox::ForkInNewPidNamespace(
+ /*drop_capabilities_in_child=*/true);
+ CHECK_GE(pid, 0);
+
+ if (pid == 0) {
+ CHECK(false);
+ _exit(kNormalExitCode);
+ }
+
+ int status;
+ PCHECK(waitpid(pid, &status, 0) == pid);
+ if (WIFEXITED(status)) {
+ CHECK_NE(kNormalExitCode, WEXITSTATUS(status));
+ }
+}
+
+SANDBOX_TEST(ForkInNewPidNamespace, BasicUsage) {
+ if (!Credentials::CanCreateProcessInNewUserNS()) {
+ return;
+ }
+
+ CHECK(sandbox::Credentials::MoveToNewUserNS());
+ const pid_t pid = NamespaceSandbox::ForkInNewPidNamespace(
+ /*drop_capabilities_in_child=*/true);
+ CHECK_GE(pid, 0);
+
+ if (pid == 0) {
+ CHECK_EQ(1, getpid());
+ CHECK(!Credentials::HasAnyCapability());
+ _exit(kNormalExitCode);
+ }
+
+ int status;
+ PCHECK(waitpid(pid, &status, 0) == pid);
+ CHECK(WIFEXITED(status));
+ CHECK_EQ(kNormalExitCode, WEXITSTATUS(status));
+}
+
+SANDBOX_TEST(ForkInNewPidNamespace, ExitWithSignal) {
+ if (!Credentials::CanCreateProcessInNewUserNS()) {
+ return;
+ }
+
+ CHECK(sandbox::Credentials::MoveToNewUserNS());
+ const pid_t pid = NamespaceSandbox::ForkInNewPidNamespace(
+ /*drop_capabilities_in_child=*/true);
+ CHECK_GE(pid, 0);
+
+ if (pid == 0) {
+ CHECK_EQ(1, getpid());
+ CHECK(!Credentials::HasAnyCapability());
+ CHECK(NamespaceSandbox::InstallTerminationSignalHandler(
+ SIGTERM, kSignalTerminationExitCode));
+ while (true) {
+ raise(SIGTERM);
+ }
+ }
+
+ int status;
+ PCHECK(waitpid(pid, &status, 0) == pid);
+ CHECK(WIFEXITED(status));
+ CHECK_EQ(kSignalTerminationExitCode, WEXITSTATUS(status));
+}
+
+volatile sig_atomic_t signal_handler_called;
+void ExitSuccessfully(int sig) {
+ signal_handler_called = 1;
+}
+
+SANDBOX_TEST(InstallTerminationSignalHandler, DoesNotOverrideExistingHandlers) {
+ struct sigaction action = {};
+ action.sa_handler = &ExitSuccessfully;
+ PCHECK(sigaction(SIGUSR1, &action, nullptr) == 0);
+
+ NamespaceSandbox::InstallDefaultTerminationSignalHandlers();
+ CHECK(!NamespaceSandbox::InstallTerminationSignalHandler(
+ SIGUSR1, kSignalTerminationExitCode));
+
+ raise(SIGUSR1);
+ CHECK_EQ(1, signal_handler_called);
+}
+
+} // namespace
+
+} // namespace sandbox
diff --git a/sandbox/linux/services/namespace_utils.cc b/sandbox/linux/services/namespace_utils.cc
new file mode 100644
index 0000000000..29b649c078
--- /dev/null
+++ b/sandbox/linux/services/namespace_utils.cc
@@ -0,0 +1,117 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/namespace_utils.h"
+
+#include <fcntl.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "base/files/file_path.h"
+#include "base/files/file_util.h"
+#include "base/files/scoped_file.h"
+#include "base/logging.h"
+#include "base/posix/eintr_wrapper.h"
+#include "base/process/launch.h"
+#include "base/strings/safe_sprintf.h"
+#include "third_party/valgrind/valgrind.h"
+
+namespace sandbox {
+
+namespace {
+bool IsRunningOnValgrind() {
+ return RUNNING_ON_VALGRIND;
+}
+
+const char kProcSelfSetgroups[] = "/proc/self/setgroups";
+} // namespace
+
+// static
+bool NamespaceUtils::WriteToIdMapFile(const char* map_file, generic_id_t id) {
+ // This function needs to be async-signal-safe, as it may be called in between
+ // fork and exec.
+
+ int fd = HANDLE_EINTR(open(map_file, O_WRONLY));
+ if (fd == -1) {
+ return false;
+ }
+
+ const generic_id_t inside_id = id;
+ const generic_id_t outside_id = id;
+
+ char mapping[64];
+ const ssize_t len =
+ base::strings::SafeSPrintf(mapping, "%d %d 1\n", inside_id, outside_id);
+ const ssize_t rc = HANDLE_EINTR(write(fd, mapping, len));
+ RAW_CHECK(IGNORE_EINTR(close(fd)) == 0);
+ return rc == len;
+}
+
+// static
+bool NamespaceUtils::KernelSupportsUnprivilegedNamespace(int type) {
+ // Valgrind will let clone(2) pass-through, but doesn't support unshare(),
+ // so always consider namespaces unsupported there.
+ if (IsRunningOnValgrind()) {
+ return false;
+ }
+
+ // As of Linux 3.8, /proc/self/ns/* files exist for all namespace types. Since
+ // user namespaces were added in 3.8, it is OK to rely on the existence of
+ // /proc/self/ns/*.
+ if (!base::PathExists(base::FilePath("/proc/self/ns/user"))) {
+ return false;
+ }
+
+ const char* path;
+ switch (type) {
+ case CLONE_NEWUSER:
+ return true;
+ case CLONE_NEWIPC:
+ path = "/proc/self/ns/ipc";
+ break;
+ case CLONE_NEWNET:
+ path = "/proc/self/ns/net";
+ break;
+ case CLONE_NEWNS:
+ path = "/proc/self/ns/mnt";
+ break;
+ case CLONE_NEWPID:
+ path = "/proc/self/ns/pid";
+ break;
+ case CLONE_NEWUTS:
+ path = "/proc/self/ns/uts";
+ break;
+ default:
+ NOTREACHED();
+ return false;
+ }
+
+ return base::PathExists(base::FilePath(path));
+}
+
+// static
+bool NamespaceUtils::KernelSupportsDenySetgroups() {
+ return base::PathExists(base::FilePath(kProcSelfSetgroups));
+}
+
+// static
+bool NamespaceUtils::DenySetgroups() {
+ // This function needs to be async-signal-safe.
+ int fd = HANDLE_EINTR(open(kProcSelfSetgroups, O_WRONLY));
+ if (fd == -1) {
+ return false;
+ }
+
+ static const char kDeny[] = "deny";
+ const ssize_t len = sizeof(kDeny) - 1;
+ const ssize_t rc = HANDLE_EINTR(write(fd, kDeny, len));
+ RAW_CHECK(IGNORE_EINTR(close(fd)) == 0);
+ return rc == len;
+}
+
+} // namespace sandbox
diff --git a/sandbox/linux/services/namespace_utils.h b/sandbox/linux/services/namespace_utils.h
new file mode 100644
index 0000000000..f3c88a9452
--- /dev/null
+++ b/sandbox/linux/services/namespace_utils.h
@@ -0,0 +1,53 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SERVICES_NAMESPACE_UTILS_H_
+#define SANDBOX_LINUX_SERVICES_NAMESPACE_UTILS_H_
+
+#include <sys/types.h>
+
+#include "base/compiler_specific.h"
+#include "base/macros.h"
+#include "base/template_util.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+
+// Utility functions for using Linux namepaces.
+class SANDBOX_EXPORT NamespaceUtils {
+ public:
+ COMPILE_ASSERT((base::is_same<uid_t, gid_t>::value), UidAndGidAreSameType);
+ // generic_id_t can be used for either uid_t or gid_t.
+ typedef uid_t generic_id_t;
+
+ // Write a uid or gid mapping from |id| to |id| in |map_file|. This function
+ // is async-signal-safe.
+ static bool WriteToIdMapFile(const char* map_file,
+ generic_id_t id) WARN_UNUSED_RESULT;
+
+ // Returns true if unprivileged namespaces of type |type| is supported
+ // (meaning that both CLONE_NEWUSER and type are are supported). |type| must
+ // be one of CLONE_NEWIPC, CLONE_NEWNET, CLONE_NEWNS, CLONE_NEWPID,
+ // CLONE_NEWUSER, or CLONE_NEWUTS. This relies on access to /proc, so it will
+ // not work from within a sandbox.
+ static bool KernelSupportsUnprivilegedNamespace(int type);
+
+ // Returns true if the kernel supports denying setgroups in a user namespace.
+ // On kernels where this is supported, DenySetgroups must be called before a
+ // gid mapping can be added.
+ static bool KernelSupportsDenySetgroups();
+
+ // Disables setgroups() within the current user namespace. On Linux 3.18.2 and
+ // later, this is required in order to write to /proc/self/gid_map without
+ // having CAP_SETGID. Callers can determine whether is this needed with
+ // KernelSupportsDenySetgroups. This function is async-signal-safe.
+ static bool DenySetgroups() WARN_UNUSED_RESULT;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(NamespaceUtils);
+};
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SERVICES_NAMESPACE_UTILS_H_
diff --git a/sandbox/linux/services/namespace_utils_unittest.cc b/sandbox/linux/services/namespace_utils_unittest.cc
new file mode 100644
index 0000000000..41ed7e89a6
--- /dev/null
+++ b/sandbox/linux/services/namespace_utils_unittest.cc
@@ -0,0 +1,72 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/namespace_utils.h"
+
+#include <errno.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "base/logging.h"
+#include "base/posix/eintr_wrapper.h"
+#include "base/process/launch.h"
+#include "sandbox/linux/services/credentials.h"
+#include "sandbox/linux/tests/unit_tests.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace sandbox {
+
+namespace {
+
+SANDBOX_TEST(NamespaceUtils, KernelSupportsUnprivilegedNamespace) {
+ const bool can_create_user_ns = Credentials::CanCreateProcessInNewUserNS();
+ const bool supports_user_ns =
+ NamespaceUtils::KernelSupportsUnprivilegedNamespace(CLONE_NEWUSER);
+ // can_create_user_ns implies supports_user_ns, but the converse is not
+ // necessarily true, as creating a user namespace can fail for various
+ // reasons.
+ if (can_create_user_ns) {
+ SANDBOX_ASSERT(supports_user_ns);
+ }
+}
+
+SANDBOX_TEST(NamespaceUtils, WriteToIdMapFile) {
+ if (!Credentials::CanCreateProcessInNewUserNS()) {
+ return;
+ }
+
+ const uid_t uid = getuid();
+ const gid_t gid = getgid();
+
+ const bool supports_deny_setgroups =
+ NamespaceUtils::KernelSupportsDenySetgroups();
+
+ const pid_t pid =
+ base::ForkWithFlags(CLONE_NEWUSER | SIGCHLD, nullptr, nullptr);
+ ASSERT_NE(-1, pid);
+ if (pid == 0) {
+ if (supports_deny_setgroups) {
+ RAW_CHECK(NamespaceUtils::DenySetgroups());
+ }
+
+ RAW_CHECK(getuid() != uid);
+ RAW_CHECK(NamespaceUtils::WriteToIdMapFile("/proc/self/uid_map", uid));
+ RAW_CHECK(getuid() == uid);
+
+ RAW_CHECK(getgid() != gid);
+ RAW_CHECK(NamespaceUtils::WriteToIdMapFile("/proc/self/gid_map", gid));
+ RAW_CHECK(getgid() == gid);
+
+ _exit(0);
+ }
+
+ int status = 42;
+ SANDBOX_ASSERT_EQ(pid, HANDLE_EINTR(waitpid(pid, &status, 0)));
+ SANDBOX_ASSERT_EQ(0, status);
+}
+
+} // namespace.
+
+} // namespace sandbox.
diff --git a/sandbox/linux/services/proc_util.cc b/sandbox/linux/services/proc_util.cc
new file mode 100644
index 0000000000..d3f755f9a1
--- /dev/null
+++ b/sandbox/linux/services/proc_util.cc
@@ -0,0 +1,119 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/proc_util.h"
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "base/logging.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/posix/eintr_wrapper.h"
+#include "base/strings/string_number_conversions.h"
+
+namespace sandbox {
+namespace {
+
+struct DIRCloser {
+ void operator()(DIR* d) const {
+ DCHECK(d);
+ PCHECK(0 == closedir(d));
+ }
+};
+
+typedef scoped_ptr<DIR, DIRCloser> ScopedDIR;
+
+base::ScopedFD OpenDirectory(const char* path) {
+ DCHECK(path);
+ base::ScopedFD directory_fd(
+ HANDLE_EINTR(open(path, O_RDONLY | O_DIRECTORY | O_CLOEXEC)));
+ PCHECK(directory_fd.is_valid());
+ return directory_fd.Pass();
+}
+
+} // namespace
+
+int ProcUtil::CountOpenFds(int proc_fd) {
+ DCHECK_LE(0, proc_fd);
+ int proc_self_fd = HANDLE_EINTR(
+ openat(proc_fd, "self/fd/", O_DIRECTORY | O_RDONLY | O_CLOEXEC));
+ PCHECK(0 <= proc_self_fd);
+
+ // Ownership of proc_self_fd is transferred here, it must not be closed
+ // or modified afterwards except via dir.
+ ScopedDIR dir(fdopendir(proc_self_fd));
+ CHECK(dir);
+
+ int count = 0;
+ struct dirent e;
+ struct dirent* de;
+ while (!readdir_r(dir.get(), &e, &de) && de) {
+ if (strcmp(e.d_name, ".") == 0 || strcmp(e.d_name, "..") == 0) {
+ continue;
+ }
+
+ int fd_num;
+ CHECK(base::StringToInt(e.d_name, &fd_num));
+ if (fd_num == proc_fd || fd_num == proc_self_fd) {
+ continue;
+ }
+
+ ++count;
+ }
+ return count;
+}
+
+bool ProcUtil::HasOpenDirectory(int proc_fd) {
+ DCHECK_LE(0, proc_fd);
+ int proc_self_fd =
+ openat(proc_fd, "self/fd/", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
+
+ PCHECK(0 <= proc_self_fd);
+
+ // Ownership of proc_self_fd is transferred here, it must not be closed
+ // or modified afterwards except via dir.
+ ScopedDIR dir(fdopendir(proc_self_fd));
+ CHECK(dir);
+
+ struct dirent e;
+ struct dirent* de;
+ while (!readdir_r(dir.get(), &e, &de) && de) {
+ if (strcmp(e.d_name, ".") == 0 || strcmp(e.d_name, "..") == 0) {
+ continue;
+ }
+
+ int fd_num;
+ CHECK(base::StringToInt(e.d_name, &fd_num));
+ if (fd_num == proc_fd || fd_num == proc_self_fd) {
+ continue;
+ }
+
+ struct stat s;
+ // It's OK to use proc_self_fd here, fstatat won't modify it.
+ CHECK(fstatat(proc_self_fd, e.d_name, &s, 0) == 0);
+ if (S_ISDIR(s.st_mode)) {
+ return true;
+ }
+ }
+
+ // No open unmanaged directories found.
+ return false;
+}
+
+bool ProcUtil::HasOpenDirectory() {
+ base::ScopedFD proc_fd(
+ HANDLE_EINTR(open("/proc/", O_DIRECTORY | O_RDONLY | O_CLOEXEC)));
+ return HasOpenDirectory(proc_fd.get());
+}
+
+//static
+base::ScopedFD ProcUtil::OpenProc() {
+ return OpenDirectory("/proc/");
+}
+
+} // namespace sandbox
diff --git a/sandbox/linux/services/proc_util.h b/sandbox/linux/services/proc_util.h
new file mode 100644
index 0000000000..bc14c5ef2a
--- /dev/null
+++ b/sandbox/linux/services/proc_util.h
@@ -0,0 +1,42 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SERVICES_PROC_UTIL_H_
+#define SANDBOX_LINUX_SERVICES_PROC_UTIL_H_
+
+#include "base/files/scoped_file.h"
+#include "base/macros.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+
+class SANDBOX_EXPORT ProcUtil {
+ public:
+ // Returns the number of file descriptors in the current process's FD
+ // table, excluding |proc_fd|, which should be a file descriptor for
+ // /proc/.
+ static int CountOpenFds(int proc_fd);
+
+ // Checks whether the current process has any directory file descriptor open.
+ // Directory file descriptors are "capabilities" that would let a process use
+ // system calls such as openat() to bypass restrictions such as
+ // DropFileSystemAccess().
+ // Sometimes it's useful to call HasOpenDirectory() after file system access
+ // has been dropped. In this case, |proc_fd| should be a file descriptor to
+ // /proc/. The file descriptor in |proc_fd| will be ignored by
+ // HasOpenDirectory() and remains owned by the caller. It is very important
+ // for the caller to close it.
+ static bool HasOpenDirectory(int proc_fd) WARN_UNUSED_RESULT;
+ static bool HasOpenDirectory() WARN_UNUSED_RESULT;
+
+ // Open /proc/ or crash if not possible.
+ static base::ScopedFD OpenProc();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ProcUtil);
+};
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SERVICES_PROC_UTIL_H_
diff --git a/sandbox/linux/services/proc_util_unittest.cc b/sandbox/linux/services/proc_util_unittest.cc
new file mode 100644
index 0000000000..bf25151956
--- /dev/null
+++ b/sandbox/linux/services/proc_util_unittest.cc
@@ -0,0 +1,62 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/proc_util.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "base/files/scoped_file.h"
+#include "base/posix/eintr_wrapper.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace sandbox {
+
+TEST(ProcUtil, CountOpenFds) {
+ base::ScopedFD proc_fd(open("/proc/", O_RDONLY | O_DIRECTORY));
+ ASSERT_TRUE(proc_fd.is_valid());
+ int fd_count = ProcUtil::CountOpenFds(proc_fd.get());
+ int fd = open("/dev/null", O_RDONLY);
+ ASSERT_LE(0, fd);
+ EXPECT_EQ(fd_count + 1, ProcUtil::CountOpenFds(proc_fd.get()));
+ ASSERT_EQ(0, IGNORE_EINTR(close(fd)));
+ EXPECT_EQ(fd_count, ProcUtil::CountOpenFds(proc_fd.get()));
+}
+
+TEST(ProcUtil, HasOpenDirectory) {
+ // No open directory should exist at startup.
+ EXPECT_FALSE(ProcUtil::HasOpenDirectory());
+ {
+ // Have a "/proc" file descriptor around.
+ int proc_fd = open("/proc/", O_RDONLY | O_DIRECTORY);
+ base::ScopedFD proc_fd_closer(proc_fd);
+ EXPECT_TRUE(ProcUtil::HasOpenDirectory());
+ }
+ EXPECT_FALSE(ProcUtil::HasOpenDirectory());
+}
+
+TEST(ProcUtil, HasOpenDirectoryWithFD) {
+ int proc_fd = open("/proc/", O_RDONLY | O_DIRECTORY);
+ base::ScopedFD proc_fd_closer(proc_fd);
+ ASSERT_LE(0, proc_fd);
+
+ // Don't pass |proc_fd|, an open directory (proc_fd) should
+ // be detected.
+ EXPECT_TRUE(ProcUtil::HasOpenDirectory());
+ // Pass |proc_fd| and no open directory should be detected.
+ EXPECT_FALSE(ProcUtil::HasOpenDirectory(proc_fd));
+
+ {
+ // Have a directory file descriptor around.
+ int open_directory_fd = open("/proc/self/", O_RDONLY | O_DIRECTORY);
+ base::ScopedFD open_directory_fd_closer(open_directory_fd);
+ EXPECT_TRUE(ProcUtil::HasOpenDirectory(proc_fd));
+ }
+
+ // The "/proc/" file descriptor should now be closed, |proc_fd| is the
+ // only directory file descriptor open.
+ EXPECT_FALSE(ProcUtil::HasOpenDirectory(proc_fd));
+}
+
+} // namespace sandbox
diff --git a/sandbox/linux/services/resource_limits.cc b/sandbox/linux/services/resource_limits.cc
new file mode 100644
index 0000000000..1ec11295d1
--- /dev/null
+++ b/sandbox/linux/services/resource_limits.cc
@@ -0,0 +1,26 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/resource_limits.h"
+
+#include <sys/resource.h>
+#include <sys/time.h>
+
+#include <algorithm>
+
+namespace sandbox {
+
+// static
+bool ResourceLimits::Lower(int resource, rlim_t limit) {
+ struct rlimit old_rlimit;
+ if (getrlimit(resource, &old_rlimit))
+ return false;
+ // Make sure we don't raise the existing limit.
+ const struct rlimit new_rlimit = {std::min(old_rlimit.rlim_cur, limit),
+ std::min(old_rlimit.rlim_max, limit)};
+ int rc = setrlimit(resource, &new_rlimit);
+ return rc == 0;
+}
+
+} // namespace sandbox
diff --git a/sandbox/linux/services/resource_limits.h b/sandbox/linux/services/resource_limits.h
new file mode 100644
index 0000000000..3464dab679
--- /dev/null
+++ b/sandbox/linux/services/resource_limits.h
@@ -0,0 +1,29 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SERVICES_RESOURCE_LIMITS_H_
+#define SANDBOX_LINUX_SERVICES_RESOURCE_LIMITS_H_
+
+#include <sys/resource.h>
+
+#include "base/compiler_specific.h"
+#include "base/macros.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+
+// This class provides a small wrapper around setrlimit().
+class SANDBOX_EXPORT ResourceLimits {
+ public:
+ // Lower the soft and hard limit of |resource| to |limit|. If the current
+ // limit is lower than |limit|, keep it.
+ static bool Lower(int resource, rlim_t limit) WARN_UNUSED_RESULT;
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ResourceLimits);
+};
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SERVICES_RESOURCE_LIMITS_H_
diff --git a/sandbox/linux/services/resource_limits_unittests.cc b/sandbox/linux/services/resource_limits_unittests.cc
new file mode 100644
index 0000000000..910c740f7b
--- /dev/null
+++ b/sandbox/linux/services/resource_limits_unittests.cc
@@ -0,0 +1,43 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/resource_limits.h"
+
+#include <errno.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "base/logging.h"
+#include "sandbox/linux/tests/test_utils.h"
+#include "sandbox/linux/tests/unit_tests.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace sandbox {
+
+namespace {
+
+// Fails on Android: crbug.com/459158
+#if !defined(OS_ANDROID)
+#define MAYBE_NoFork DISABLE_ON_ASAN(NoFork)
+#else
+#define MAYBE_NoFork DISABLED_NoFork
+#endif // OS_ANDROID
+
+// Not being able to fork breaks LeakSanitizer, so disable on
+// all ASAN builds.
+SANDBOX_TEST(ResourceLimits, MAYBE_NoFork) {
+ // Make sure that fork will fail with EAGAIN.
+ SANDBOX_ASSERT(ResourceLimits::Lower(RLIMIT_NPROC, 0));
+ errno = 0;
+ pid_t pid = fork();
+ // Reap any child if fork succeeded.
+ TestUtils::HandlePostForkReturn(pid);
+ SANDBOX_ASSERT_EQ(-1, pid);
+ CHECK_EQ(EAGAIN, errno);
+}
+
+} // namespace
+
+} // namespace sandbox
diff --git a/sandbox/linux/services/scoped_process.cc b/sandbox/linux/services/scoped_process.cc
new file mode 100644
index 0000000000..65af4873a4
--- /dev/null
+++ b/sandbox/linux/services/scoped_process.cc
@@ -0,0 +1,119 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/scoped_process.h"
+
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "base/callback.h"
+#include "base/logging.h"
+#include "base/posix/eintr_wrapper.h"
+#include "build/build_config.h"
+#include "sandbox/linux/services/syscall_wrappers.h"
+#include "sandbox/linux/services/thread_helpers.h"
+
+namespace sandbox {
+
+namespace {
+
+const char kSynchronisationChar[] = "D";
+
+void WaitForever() {
+ while(true) {
+ pause();
+ }
+}
+
+} // namespace
+
+ScopedProcess::ScopedProcess(const base::Closure& child_callback)
+ : child_process_id_(-1), process_id_(getpid()) {
+ PCHECK(0 == pipe(pipe_fds_));
+#if !defined(THREAD_SANITIZER)
+ // Make sure that we can safely fork().
+ CHECK(ThreadHelpers::IsSingleThreaded());
+#endif
+ child_process_id_ = fork();
+ PCHECK(0 <= child_process_id_);
+
+ if (0 == child_process_id_) {
+ PCHECK(0 == IGNORE_EINTR(close(pipe_fds_[0])));
+ pipe_fds_[0] = -1;
+ child_callback.Run();
+ // Notify the parent that the closure has run.
+ CHECK_EQ(1, HANDLE_EINTR(write(pipe_fds_[1], kSynchronisationChar, 1)));
+ WaitForever();
+ NOTREACHED();
+ _exit(1);
+ }
+
+ PCHECK(0 == IGNORE_EINTR(close(pipe_fds_[1])));
+ pipe_fds_[1] = -1;
+}
+
+ScopedProcess::~ScopedProcess() {
+ CHECK(IsOriginalProcess());
+ if (child_process_id_ >= 0) {
+ PCHECK(0 == kill(child_process_id_, SIGKILL));
+ siginfo_t process_info;
+
+ PCHECK(0 == HANDLE_EINTR(
+ waitid(P_PID, child_process_id_, &process_info, WEXITED)));
+ }
+ if (pipe_fds_[0] >= 0) {
+ PCHECK(0 == IGNORE_EINTR(close(pipe_fds_[0])));
+ }
+ if (pipe_fds_[1] >= 0) {
+ PCHECK(0 == IGNORE_EINTR(close(pipe_fds_[1])));
+ }
+}
+
+int ScopedProcess::WaitForExit(bool* got_signaled) {
+ DCHECK(got_signaled);
+ CHECK(IsOriginalProcess());
+ siginfo_t process_info;
+ // WNOWAIT to make sure that the destructor can wait on the child.
+ int ret = HANDLE_EINTR(
+ waitid(P_PID, child_process_id_, &process_info, WEXITED | WNOWAIT));
+ PCHECK(0 == ret) << "Did something else wait on the child?";
+
+ if (process_info.si_code == CLD_EXITED) {
+ *got_signaled = false;
+ } else if (process_info.si_code == CLD_KILLED ||
+ process_info.si_code == CLD_DUMPED) {
+ *got_signaled = true;
+ } else {
+ CHECK(false) << "ScopedProcess needs to be extended for si_code "
+ << process_info.si_code;
+ }
+ return process_info.si_status;
+}
+
+bool ScopedProcess::WaitForClosureToRun() {
+ char c = 0;
+ int ret = HANDLE_EINTR(read(pipe_fds_[0], &c, 1));
+ PCHECK(ret >= 0);
+ if (0 == ret)
+ return false;
+
+ CHECK_EQ(c, kSynchronisationChar[0]);
+ return true;
+}
+
+// It would be problematic if after a fork(), another process would start using
+// this object.
+// This method allows to assert it is not happening.
+bool ScopedProcess::IsOriginalProcess() {
+ // Make a direct syscall to bypass glibc caching of PIDs.
+ pid_t pid = sys_getpid();
+ return pid == process_id_;
+}
+
+} // namespace sandbox
diff --git a/sandbox/linux/services/scoped_process.h b/sandbox/linux/services/scoped_process.h
new file mode 100644
index 0000000000..bddbd5529b
--- /dev/null
+++ b/sandbox/linux/services/scoped_process.h
@@ -0,0 +1,55 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SERVICES_SCOPED_PROCESS_H_
+#define SANDBOX_LINUX_SERVICES_SCOPED_PROCESS_H_
+
+#include "base/callback_forward.h"
+#include "base/macros.h"
+#include "base/process/process_handle.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+
+// fork() a child process that will run a Closure.
+// After the Closure has run, the child will pause forever. If this object
+// is detroyed, the child will be destroyed, even if the closure did not
+// finish running. It's ok to signal the child from outside of this class to
+// destroy it.
+// This class cannot be instanciated from a multi-threaded process, as it needs
+// to fork().
+class SANDBOX_EXPORT ScopedProcess {
+ public:
+ // A new process will be created and |child_callback| will run in the child
+ // process. This callback is allowed to terminate the process or to simply
+ // return. If the callback returns, the process will wait forever.
+ explicit ScopedProcess(const base::Closure& child_callback);
+ ~ScopedProcess();
+
+ // Wait for the process to exit.
+ // |got_signaled| tells how to interpret the return value: either as an exit
+ // code, or as a signal number.
+ // When this returns, the process will still not have been reaped and will
+ // survive as a zombie for the lifetime of this object. This method can be
+ // called multiple times.
+ int WaitForExit(bool* got_signaled);
+
+ // Wait for the |child_callback| passed at construction to run. Return false
+ // if |child_callback| did not finish running and we know it never will (for
+ // instance the child crashed or used _exit()).
+ bool WaitForClosureToRun();
+ base::ProcessId GetPid() { return child_process_id_; }
+
+ private:
+ bool IsOriginalProcess();
+
+ base::ProcessId child_process_id_;
+ base::ProcessId process_id_;
+ int pipe_fds_[2];
+ DISALLOW_COPY_AND_ASSIGN(ScopedProcess);
+};
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SERVICES_SCOPED_PROCESS_H_
diff --git a/sandbox/linux/services/scoped_process_unittest.cc b/sandbox/linux/services/scoped_process_unittest.cc
new file mode 100644
index 0000000000..8bd2847997
--- /dev/null
+++ b/sandbox/linux/services/scoped_process_unittest.cc
@@ -0,0 +1,130 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/scoped_process.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "base/bind.h"
+#include "base/callback.h"
+#include "base/files/file_util.h"
+#include "base/files/scoped_file.h"
+#include "base/logging.h"
+#include "base/posix/eintr_wrapper.h"
+#include "base/threading/platform_thread.h"
+#include "base/time/time.h"
+#include "sandbox/linux/tests/unit_tests.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace sandbox {
+
+namespace {
+
+void DoExit() { _exit(0); }
+
+void ExitWithCode(int exit_code) { _exit(exit_code); }
+
+void RaiseAndExit(int signal) {
+ PCHECK(0 == raise(signal));
+ _exit(0);
+}
+
+void DoNothing() {}
+
+TEST(ScopedProcess, ScopedProcessNormalExit) {
+ const int kCustomExitCode = 12;
+ ScopedProcess process(base::Bind(&ExitWithCode, kCustomExitCode));
+ bool got_signaled = true;
+ int exit_code = process.WaitForExit(&got_signaled);
+ EXPECT_FALSE(got_signaled);
+ EXPECT_EQ(kCustomExitCode, exit_code);
+
+ // Verify that WaitForExit() can be called multiple times on the same
+ // process.
+ bool got_signaled2 = true;
+ int exit_code2 = process.WaitForExit(&got_signaled2);
+ EXPECT_FALSE(got_signaled2);
+ EXPECT_EQ(kCustomExitCode, exit_code2);
+}
+
+// Disable this test on Android, SIGABRT is funky there.
+TEST(ScopedProcess, DISABLE_ON_ANDROID(ScopedProcessAbort)) {
+ PCHECK(SIG_ERR != signal(SIGABRT, SIG_DFL));
+ ScopedProcess process(base::Bind(&RaiseAndExit, SIGABRT));
+ bool got_signaled = false;
+ int exit_code = process.WaitForExit(&got_signaled);
+ EXPECT_TRUE(got_signaled);
+ EXPECT_EQ(SIGABRT, exit_code);
+}
+
+TEST(ScopedProcess, ScopedProcessSignaled) {
+ ScopedProcess process(base::Bind(&DoNothing));
+ bool got_signaled = false;
+ ASSERT_EQ(0, kill(process.GetPid(), SIGKILL));
+ int exit_code = process.WaitForExit(&got_signaled);
+ EXPECT_TRUE(got_signaled);
+ EXPECT_EQ(SIGKILL, exit_code);
+}
+
+TEST(ScopedProcess, DiesForReal) {
+ int pipe_fds[2];
+ ASSERT_EQ(0, pipe(pipe_fds));
+ base::ScopedFD read_end_closer(pipe_fds[0]);
+ base::ScopedFD write_end_closer(pipe_fds[1]);
+
+ { ScopedProcess process(base::Bind(&DoExit)); }
+
+ // Close writing end of the pipe.
+ write_end_closer.reset();
+ pipe_fds[1] = -1;
+
+ ASSERT_EQ(0, fcntl(pipe_fds[0], F_SETFL, O_NONBLOCK));
+ char c;
+ // If the child process is dead for real, there will be no writing end
+ // for this pipe left and read will EOF instead of returning EWOULDBLOCK.
+ ASSERT_EQ(0, read(pipe_fds[0], &c, 1));
+}
+
+TEST(ScopedProcess, SynchronizationBasic) {
+ ScopedProcess process1(base::Bind(&DoNothing));
+ EXPECT_TRUE(process1.WaitForClosureToRun());
+
+ ScopedProcess process2(base::Bind(&DoExit));
+ // The closure didn't finish running normally. This case is simple enough
+ // that process.WaitForClosureToRun() should return false, even though the
+ // API does not guarantees that it will return at all.
+ EXPECT_FALSE(process2.WaitForClosureToRun());
+}
+
+void SleepInMsAndWriteOneByte(int time_to_sleep, int fd) {
+ base::PlatformThread::Sleep(base::TimeDelta::FromMilliseconds(time_to_sleep));
+ CHECK(1 == write(fd, "1", 1));
+}
+
+TEST(ScopedProcess, SynchronizationWorks) {
+ int pipe_fds[2];
+ ASSERT_EQ(0, pipe(pipe_fds));
+ base::ScopedFD read_end_closer(pipe_fds[0]);
+ base::ScopedFD write_end_closer(pipe_fds[1]);
+
+ // Start a process with a closure that takes a little bit to run.
+ ScopedProcess process(
+ base::Bind(&SleepInMsAndWriteOneByte, 100, pipe_fds[1]));
+ EXPECT_TRUE(process.WaitForClosureToRun());
+
+ // Verify that the closure did, indeed, run.
+ ASSERT_EQ(0, fcntl(pipe_fds[0], F_SETFL, O_NONBLOCK));
+ char c = 0;
+ EXPECT_EQ(1, read(pipe_fds[0], &c, 1));
+ EXPECT_EQ('1', c);
+}
+
+} // namespace
+
+} // namespace sandbox
diff --git a/sandbox/linux/services/syscall_wrappers.cc b/sandbox/linux/services/syscall_wrappers.cc
new file mode 100644
index 0000000000..b6e87655a3
--- /dev/null
+++ b/sandbox/linux/services/syscall_wrappers.cc
@@ -0,0 +1,246 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/syscall_wrappers.h"
+
+#include <pthread.h>
+#include <sched.h>
+#include <setjmp.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <cstring>
+
+#include "base/compiler_specific.h"
+#include "base/logging.h"
+#include "build/build_config.h"
+#include "sandbox/linux/system_headers/capability.h"
+#include "sandbox/linux/system_headers/linux_signal.h"
+#include "sandbox/linux/system_headers/linux_syscalls.h"
+#include "third_party/valgrind/valgrind.h"
+
+namespace sandbox {
+
+pid_t sys_getpid(void) {
+ return syscall(__NR_getpid);
+}
+
+pid_t sys_gettid(void) {
+ return syscall(__NR_gettid);
+}
+
+long sys_clone(unsigned long flags,
+ decltype(nullptr) child_stack,
+ pid_t* ptid,
+ pid_t* ctid,
+ decltype(nullptr) tls) {
+ const bool clone_tls_used = flags & CLONE_SETTLS;
+ const bool invalid_ctid =
+ (flags & (CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)) && !ctid;
+ const bool invalid_ptid = (flags & CLONE_PARENT_SETTID) && !ptid;
+
+ // We do not support CLONE_VM.
+ const bool clone_vm_used = flags & CLONE_VM;
+ if (clone_tls_used || invalid_ctid || invalid_ptid || clone_vm_used) {
+ RAW_LOG(FATAL, "Invalid usage of sys_clone");
+ }
+
+ if (ptid) MSAN_UNPOISON(ptid, sizeof(*ptid));
+ if (ctid) MSAN_UNPOISON(ctid, sizeof(*ctid));
+ // See kernel/fork.c in Linux. There is different ordering of sys_clone
+ // parameters depending on CONFIG_CLONE_BACKWARDS* configuration options.
+#if defined(ARCH_CPU_X86_64)
+ return syscall(__NR_clone, flags, child_stack, ptid, ctid, tls);
+#elif defined(ARCH_CPU_X86) || defined(ARCH_CPU_ARM_FAMILY) || \
+ defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_MIPS64_FAMILY)
+ // CONFIG_CLONE_BACKWARDS defined.
+ return syscall(__NR_clone, flags, child_stack, ptid, tls, ctid);
+#endif
+}
+
+long sys_clone(unsigned long flags) {
+ return sys_clone(flags, nullptr, nullptr, nullptr, nullptr);
+}
+
+void sys_exit_group(int status) {
+ syscall(__NR_exit_group, status);
+}
+
+int sys_seccomp(unsigned int operation,
+ unsigned int flags,
+ const struct sock_fprog* args) {
+ return syscall(__NR_seccomp, operation, flags, args);
+}
+
+int sys_prlimit64(pid_t pid,
+ int resource,
+ const struct rlimit64* new_limit,
+ struct rlimit64* old_limit) {
+ int res = syscall(__NR_prlimit64, pid, resource, new_limit, old_limit);
+ if (res == 0 && old_limit) MSAN_UNPOISON(old_limit, sizeof(*old_limit));
+ return res;
+}
+
+int sys_capget(cap_hdr* hdrp, cap_data* datap) {
+ int res = syscall(__NR_capget, hdrp, datap);
+ if (res == 0) {
+ if (hdrp) MSAN_UNPOISON(hdrp, sizeof(*hdrp));
+ if (datap) MSAN_UNPOISON(datap, sizeof(*datap));
+ }
+ return res;
+}
+
+int sys_capset(cap_hdr* hdrp, const cap_data* datap) {
+ return syscall(__NR_capset, hdrp, datap);
+}
+
+int sys_getresuid(uid_t* ruid, uid_t* euid, uid_t* suid) {
+ int res;
+#if defined(ARCH_CPU_X86) || defined(ARCH_CPU_ARMEL)
+ // On 32-bit x86 or 32-bit arm, getresuid supports 16bit values only.
+ // Use getresuid32 instead.
+ res = syscall(__NR_getresuid32, ruid, euid, suid);
+#else
+ res = syscall(__NR_getresuid, ruid, euid, suid);
+#endif
+ if (res == 0) {
+ if (ruid) MSAN_UNPOISON(ruid, sizeof(*ruid));
+ if (euid) MSAN_UNPOISON(euid, sizeof(*euid));
+ if (suid) MSAN_UNPOISON(suid, sizeof(*suid));
+ }
+ return res;
+}
+
+int sys_getresgid(gid_t* rgid, gid_t* egid, gid_t* sgid) {
+ int res;
+#if defined(ARCH_CPU_X86) || defined(ARCH_CPU_ARMEL)
+ // On 32-bit x86 or 32-bit arm, getresgid supports 16bit values only.
+ // Use getresgid32 instead.
+ res = syscall(__NR_getresgid32, rgid, egid, sgid);
+#else
+ res = syscall(__NR_getresgid, rgid, egid, sgid);
+#endif
+ if (res == 0) {
+ if (rgid) MSAN_UNPOISON(rgid, sizeof(*rgid));
+ if (egid) MSAN_UNPOISON(egid, sizeof(*egid));
+ if (sgid) MSAN_UNPOISON(sgid, sizeof(*sgid));
+ }
+ return res;
+}
+
+int sys_chroot(const char* path) {
+ return syscall(__NR_chroot, path);
+}
+
+int sys_unshare(int flags) {
+ return syscall(__NR_unshare, flags);
+}
+
+int sys_sigprocmask(int how, const sigset_t* set, decltype(nullptr) oldset) {
+ // In some toolchain (in particular Android and PNaCl toolchain),
+ // sigset_t is 32 bits, but Linux ABI requires 64 bits.
+ uint64_t linux_value = 0;
+ std::memcpy(&linux_value, set, std::min(sizeof(sigset_t), sizeof(uint64_t)));
+ return syscall(__NR_rt_sigprocmask, how, &linux_value, nullptr,
+ sizeof(linux_value));
+}
+
+#if (defined(MEMORY_SANITIZER) || defined(THREAD_SANITIZER) || \
+ (defined(ARCH_CPU_X86_64) && !defined(__clang__))) && \
+ !defined(OS_NACL_NONSFI)
+// If MEMORY_SANITIZER or THREAD_SANITIZER is enabled, it is necessary to call
+// sigaction() here, rather than the direct syscall (sys_sigaction() defined
+// by ourselves).
+// It is because, if MEMORY_SANITIZER or THREAD_SANITIZER is enabled, sigaction
+// is wrapped, and |act->sa_handler| is injected in order to unpoisonize the
+// memory passed via callback's arguments for MEMORY_SANITIZER, or handle
+// signals to check thread consistency for THREAD_SANITIZER. Please see
+// msan_interceptors.cc and tsan_interceptors.cc for more details.
+// So, specifically, if MEMORY_SANITIZER is enabled while the direct syscall is
+// used, as MEMORY_SANITIZER does not know about it, sigaction() invocation in
+// other places would be broken (in more precise, returned |oldact| would have
+// a broken |sa_handler| callback).
+// Practically, it would break NaCl's signal handler installation.
+// cf) native_client/src/trusted/service_runtime/linux/nacl_signal.c.
+// As for THREAD_SANITIZER, the intercepted signal handlers are processed more
+// in other libc functions' interceptors (such as for raise()), so that it
+// would not work properly.
+//
+// Also on x86_64 architecture, we need naked function for rt_sigreturn.
+// However, there is no simple way to define it with GCC. Note that the body
+// of function is actually very small (only two instructions), but we need to
+// define much debug information in addition, otherwise backtrace() used by
+// base::StackTrace would not work so that some tests would fail.
+//
+// When this is built with PNaCl toolchain, we should always use sys_sigaction
+// below, because sigaction() provided by the toolchain is incompatible with
+// Linux's ABI. So, otherwise, it would just fail. Note that it is not
+// necessary to think about sigaction() invocation in other places even with
+// MEMORY_SANITIZER or THREAD_SANITIZER, because it would just fail there.
+int sys_sigaction(int signum,
+ const struct sigaction* act,
+ struct sigaction* oldact) {
+ return sigaction(signum, act, oldact);
+}
+#else
+// struct sigaction is different ABI from the Linux's.
+struct KernelSigAction {
+ void (*kernel_handler)(int);
+ uint32_t sa_flags;
+ void (*sa_restorer)(void);
+ uint64_t sa_mask;
+};
+
+// On X86_64 arch, it is necessary to set sa_restorer always.
+#if defined(ARCH_CPU_X86_64)
+#if !defined(SA_RESTORER)
+#define SA_RESTORER 0x04000000
+#endif
+
+// rt_sigreturn is a special system call that interacts with the user land
+// stack. Thus, here prologue must not be created, which implies syscall()
+// does not work properly, too. Note that rt_sigreturn will never return.
+static __attribute__((naked)) void sys_rt_sigreturn() {
+ // Just invoke rt_sigreturn system call.
+ asm volatile ("syscall\n"
+ :: "a"(__NR_rt_sigreturn));
+}
+#endif
+
+int sys_sigaction(int signum,
+ const struct sigaction* act,
+ struct sigaction* oldact) {
+ KernelSigAction kernel_act = {};
+ if (act) {
+ kernel_act.kernel_handler = act->sa_handler;
+ std::memcpy(&kernel_act.sa_mask, &act->sa_mask,
+ std::min(sizeof(kernel_act.sa_mask), sizeof(act->sa_mask)));
+ kernel_act.sa_flags = act->sa_flags;
+
+#if defined(ARCH_CPU_X86_64)
+ if (!(kernel_act.sa_flags & SA_RESTORER)) {
+ kernel_act.sa_flags |= SA_RESTORER;
+ kernel_act.sa_restorer = sys_rt_sigreturn;
+ }
+#endif
+ }
+
+ KernelSigAction kernel_oldact = {};
+ int result = syscall(__NR_rt_sigaction, signum, act ? &kernel_act : nullptr,
+ oldact ? &kernel_oldact : nullptr, sizeof(uint64_t));
+ if (result == 0 && oldact) {
+ oldact->sa_handler = kernel_oldact.kernel_handler;
+ sigemptyset(&oldact->sa_mask);
+ std::memcpy(&oldact->sa_mask, &kernel_oldact.sa_mask,
+ std::min(sizeof(kernel_act.sa_mask), sizeof(act->sa_mask)));
+ oldact->sa_flags = kernel_oldact.sa_flags;
+ }
+ return result;
+}
+
+#endif // defined(MEMORY_SANITIZER)
+
+} // namespace sandbox
diff --git a/sandbox/linux/services/syscall_wrappers.h b/sandbox/linux/services/syscall_wrappers.h
new file mode 100644
index 0000000000..581425a367
--- /dev/null
+++ b/sandbox/linux/services/syscall_wrappers.h
@@ -0,0 +1,83 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SERVICES_SYSCALL_WRAPPERS_H_
+#define SANDBOX_LINUX_SERVICES_SYSCALL_WRAPPERS_H_
+
+#include <signal.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "sandbox/sandbox_export.h"
+
+struct sock_fprog;
+struct rlimit64;
+struct cap_hdr;
+struct cap_data;
+
+namespace sandbox {
+
+// Provide direct system call wrappers for a few common system calls.
+// These are guaranteed to perform a system call and do not rely on things such
+// as caching the current pid (c.f. getpid()) unless otherwise specified.
+
+SANDBOX_EXPORT pid_t sys_getpid(void);
+
+SANDBOX_EXPORT pid_t sys_gettid(void);
+
+SANDBOX_EXPORT long sys_clone(unsigned long flags);
+
+// |regs| is not supported and must be passed as nullptr. |child_stack| must be
+// nullptr, since otherwise this function cannot safely return. As a
+// consequence, this function does not support CLONE_VM.
+SANDBOX_EXPORT long sys_clone(unsigned long flags,
+ decltype(nullptr) child_stack,
+ pid_t* ptid,
+ pid_t* ctid,
+ decltype(nullptr) regs);
+
+SANDBOX_EXPORT void sys_exit_group(int status);
+
+// The official system call takes |args| as void* (in order to be extensible),
+// but add more typing for the cases that are currently used.
+SANDBOX_EXPORT int sys_seccomp(unsigned int operation,
+ unsigned int flags,
+ const struct sock_fprog* args);
+
+// Some libcs do not expose a prlimit64 wrapper.
+SANDBOX_EXPORT int sys_prlimit64(pid_t pid,
+ int resource,
+ const struct rlimit64* new_limit,
+ struct rlimit64* old_limit);
+
+// Some libcs do not expose capget/capset wrappers. We want to use these
+// directly in order to avoid pulling in libcap2.
+SANDBOX_EXPORT int sys_capget(struct cap_hdr* hdrp, struct cap_data* datap);
+SANDBOX_EXPORT int sys_capset(struct cap_hdr* hdrp,
+ const struct cap_data* datap);
+
+// Some libcs do not expose getresuid/getresgid wrappers.
+SANDBOX_EXPORT int sys_getresuid(uid_t* ruid, uid_t* euid, uid_t* suid);
+SANDBOX_EXPORT int sys_getresgid(gid_t* rgid, gid_t* egid, gid_t* sgid);
+
+// Some libcs do not expose a chroot wrapper.
+SANDBOX_EXPORT int sys_chroot(const char* path);
+
+// Some libcs do not expose a unshare wrapper.
+SANDBOX_EXPORT int sys_unshare(int flags);
+
+// Some libcs do not expose a sigprocmask. Note that oldset must be a nullptr,
+// because of some ABI gap between toolchain's and Linux's.
+SANDBOX_EXPORT int sys_sigprocmask(int how,
+ const sigset_t* set,
+ decltype(nullptr) oldset);
+
+// Some libcs do not expose a sigaction().
+SANDBOX_EXPORT int sys_sigaction(int signum,
+ const struct sigaction* act,
+ struct sigaction* oldact);
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SERVICES_SYSCALL_WRAPPERS_H_
diff --git a/sandbox/linux/services/syscall_wrappers_unittest.cc b/sandbox/linux/services/syscall_wrappers_unittest.cc
new file mode 100644
index 0000000000..249d9ae1da
--- /dev/null
+++ b/sandbox/linux/services/syscall_wrappers_unittest.cc
@@ -0,0 +1,99 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/syscall_wrappers.h"
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <cstring>
+
+#include "base/logging.h"
+#include "base/posix/eintr_wrapper.h"
+#include "build/build_config.h"
+#include "sandbox/linux/system_headers/linux_signal.h"
+#include "sandbox/linux/tests/test_utils.h"
+#include "sandbox/linux/tests/unit_tests.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/valgrind/valgrind.h"
+
+namespace sandbox {
+
+namespace {
+
+TEST(SyscallWrappers, BasicSyscalls) {
+ EXPECT_EQ(getpid(), sys_getpid());
+}
+
+TEST(SyscallWrappers, CloneBasic) {
+ pid_t child = sys_clone(SIGCHLD);
+ TestUtils::HandlePostForkReturn(child);
+ EXPECT_LT(0, child);
+}
+
+TEST(SyscallWrappers, CloneParentSettid) {
+ pid_t ptid = 0;
+ pid_t child = sys_clone(CLONE_PARENT_SETTID | SIGCHLD, nullptr, &ptid,
+ nullptr, nullptr);
+ TestUtils::HandlePostForkReturn(child);
+ EXPECT_LT(0, child);
+ EXPECT_EQ(child, ptid);
+}
+
+TEST(SyscallWrappers, CloneChildSettid) {
+ pid_t ctid = 0;
+ pid_t pid =
+ sys_clone(CLONE_CHILD_SETTID | SIGCHLD, nullptr, nullptr, &ctid, nullptr);
+
+ const int kSuccessExit = 0;
+ if (0 == pid) {
+ // In child.
+ if (sys_getpid() == ctid)
+ _exit(kSuccessExit);
+ _exit(1);
+ }
+
+ ASSERT_NE(-1, pid);
+ int status = 0;
+ ASSERT_EQ(pid, HANDLE_EINTR(waitpid(pid, &status, 0)));
+ ASSERT_TRUE(WIFEXITED(status));
+ EXPECT_EQ(kSuccessExit, WEXITSTATUS(status));
+}
+
+TEST(SyscallWrappers, GetRESUid) {
+ uid_t ruid, euid, suid;
+ uid_t sys_ruid, sys_euid, sys_suid;
+ ASSERT_EQ(0, getresuid(&ruid, &euid, &suid));
+ ASSERT_EQ(0, sys_getresuid(&sys_ruid, &sys_euid, &sys_suid));
+ EXPECT_EQ(ruid, sys_ruid);
+ EXPECT_EQ(euid, sys_euid);
+ EXPECT_EQ(suid, sys_suid);
+}
+
+TEST(SyscallWrappers, GetRESGid) {
+ gid_t rgid, egid, sgid;
+ gid_t sys_rgid, sys_egid, sys_sgid;
+ ASSERT_EQ(0, getresgid(&rgid, &egid, &sgid));
+ ASSERT_EQ(0, sys_getresgid(&sys_rgid, &sys_egid, &sys_sgid));
+ EXPECT_EQ(rgid, sys_rgid);
+ EXPECT_EQ(egid, sys_egid);
+ EXPECT_EQ(sgid, sys_sgid);
+}
+
+TEST(SyscallWrappers, LinuxSigSet) {
+ sigset_t sigset;
+ ASSERT_EQ(0, sigemptyset(&sigset));
+ ASSERT_EQ(0, sigaddset(&sigset, LINUX_SIGSEGV));
+ ASSERT_EQ(0, sigaddset(&sigset, LINUX_SIGBUS));
+ uint64_t linux_sigset = 0;
+ std::memcpy(&linux_sigset, &sigset,
+ std::min(sizeof(sigset), sizeof(linux_sigset)));
+ EXPECT_EQ((1ULL << (LINUX_SIGSEGV - 1)) | (1ULL << (LINUX_SIGBUS - 1)),
+ linux_sigset);
+}
+
+} // namespace
+
+} // namespace sandbox
diff --git a/sandbox/linux/services/thread_helpers.cc b/sandbox/linux/services/thread_helpers.cc
new file mode 100644
index 0000000000..80766a9bc5
--- /dev/null
+++ b/sandbox/linux/services/thread_helpers.cc
@@ -0,0 +1,157 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/thread_helpers.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "base/bind.h"
+#include "base/callback.h"
+#include "base/files/scoped_file.h"
+#include "base/logging.h"
+#include "base/posix/eintr_wrapper.h"
+#include "base/strings/string_number_conversions.h"
+#include "base/threading/platform_thread.h"
+#include "base/threading/thread.h"
+#include "sandbox/linux/services/proc_util.h"
+
+namespace sandbox {
+
+namespace {
+
+const char kAssertSingleThreadedError[] =
+ "Current process is not mono-threaded!";
+
+bool IsSingleThreadedImpl(int proc_fd) {
+ CHECK_LE(0, proc_fd);
+ struct stat task_stat;
+ int fstat_ret = fstatat(proc_fd, "self/task/", &task_stat, 0);
+ PCHECK(0 == fstat_ret);
+
+ // At least "..", "." and the current thread should be present.
+ CHECK_LE(3UL, task_stat.st_nlink);
+ // Counting threads via /proc/self/task could be racy. For the purpose of
+ // determining if the current proces is monothreaded it works: if at any
+ // time it becomes monothreaded, it'll stay so.
+ return task_stat.st_nlink == 3;
+}
+
+bool IsThreadPresentInProcFS(int proc_fd,
+ const std::string& thread_id_dir_str) {
+ struct stat task_stat;
+ const int fstat_ret =
+ fstatat(proc_fd, thread_id_dir_str.c_str(), &task_stat, 0);
+ if (fstat_ret < 0) {
+ PCHECK(ENOENT == errno);
+ return false;
+ }
+ return true;
+}
+
+// Run |cb| in a loop until it returns false. Every time |cb| runs, sleep
+// for an exponentially increasing amount of time. |cb| is expected to return
+// false very quickly and this will crash if it doesn't happen within ~64ms on
+// Debug builds (2s on Release builds).
+// This is guaranteed to not sleep more than twice as much as the bare minimum
+// amount of time.
+void RunWhileTrue(const base::Callback<bool(void)>& cb) {
+#if defined(NDEBUG)
+ // In Release mode, crash after 30 iterations, which means having spent
+ // roughly 2s in
+ // nanosleep(2) cumulatively.
+ const unsigned int kMaxIterations = 30U;
+#else
+ // In practice, this never goes through more than a couple iterations. In
+ // debug mode, crash after 64ms (+ eventually 25 times the granularity of
+ // the clock) in nanosleep(2). This ensures that this is not becoming too
+ // slow.
+ const unsigned int kMaxIterations = 25U;
+#endif
+
+ // Run |cb| with an exponential back-off, sleeping 2^iterations nanoseconds
+ // in nanosleep(2).
+ // Note: the clock may not allow for nanosecond granularity, in this case the
+ // first iterations would sleep a tiny bit more instead, which would not
+ // change the calculations significantly.
+ for (unsigned int i = 0; i < kMaxIterations; ++i) {
+ if (!cb.Run()) {
+ return;
+ }
+
+ // Increase the waiting time exponentially.
+ struct timespec ts = {0, 1L << i /* nanoseconds */};
+ PCHECK(0 == HANDLE_EINTR(nanosleep(&ts, &ts)));
+ }
+
+ LOG(FATAL) << kAssertSingleThreadedError << " (iterations: " << kMaxIterations
+ << ")";
+
+ NOTREACHED();
+}
+
+bool IsMultiThreaded(int proc_fd) {
+ return !ThreadHelpers::IsSingleThreaded(proc_fd);
+}
+
+} // namespace
+
+// static
+bool ThreadHelpers::IsSingleThreaded(int proc_fd) {
+ DCHECK_LE(0, proc_fd);
+ return IsSingleThreadedImpl(proc_fd);
+}
+
+// static
+bool ThreadHelpers::IsSingleThreaded() {
+ base::ScopedFD task_fd(ProcUtil::OpenProc());
+ return IsSingleThreaded(task_fd.get());
+}
+
+// static
+void ThreadHelpers::AssertSingleThreaded(int proc_fd) {
+ DCHECK_LE(0, proc_fd);
+ const base::Callback<bool(void)> cb = base::Bind(&IsMultiThreaded, proc_fd);
+ RunWhileTrue(cb);
+}
+
+void ThreadHelpers::AssertSingleThreaded() {
+ base::ScopedFD task_fd(ProcUtil::OpenProc());
+ AssertSingleThreaded(task_fd.get());
+}
+
+// static
+bool ThreadHelpers::StopThreadAndWatchProcFS(int proc_fd,
+ base::Thread* thread) {
+ DCHECK_LE(0, proc_fd);
+ DCHECK(thread);
+ const base::PlatformThreadId thread_id = thread->thread_id();
+ const std::string thread_id_dir_str =
+ "self/task/" + base::IntToString(thread_id) + "/";
+
+ // The kernel is at liberty to wake the thread id futex before updating
+ // /proc. Following Stop(), the thread is joined, but entries in /proc may
+ // not have been updated.
+ thread->Stop();
+
+ const base::Callback<bool(void)> cb =
+ base::Bind(&IsThreadPresentInProcFS, proc_fd, thread_id_dir_str);
+
+ RunWhileTrue(cb);
+
+ return true;
+}
+
+// static
+const char* ThreadHelpers::GetAssertSingleThreadedErrorMessageForTests() {
+ return kAssertSingleThreadedError;
+}
+
+} // namespace sandbox
diff --git a/sandbox/linux/services/thread_helpers.h b/sandbox/linux/services/thread_helpers.h
new file mode 100644
index 0000000000..f4abdffd03
--- /dev/null
+++ b/sandbox/linux/services/thread_helpers.h
@@ -0,0 +1,43 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SERVICES_THREAD_HELPERS_H_
+#define SANDBOX_LINUX_SERVICES_THREAD_HELPERS_H_
+
+#include "base/macros.h"
+#include "sandbox/sandbox_export.h"
+
+namespace base { class Thread; }
+
+namespace sandbox {
+
+class SANDBOX_EXPORT ThreadHelpers {
+ public:
+ // Check whether the current process is single threaded. |proc_fd|
+ // must be a file descriptor to /proc/ and remains owned by the
+ // caller.
+ static bool IsSingleThreaded(int proc_fd);
+ static bool IsSingleThreaded();
+
+ // Crash if the current process is not single threaded. This will wait
+ // on /proc to be updated. In the case where this doesn't crash, this will
+ // return promptly. In the case where this does crash, this will first wait
+ // for a few ms in Debug mode, a few seconds in Release mode.
+ static void AssertSingleThreaded(int proc_fd);
+ static void AssertSingleThreaded();
+
+ // Stop |thread| and ensure that it does not have an entry in
+ // /proc/self/task/ from the point of view of the current thread. This is
+ // the way to stop threads before calling IsSingleThreaded().
+ static bool StopThreadAndWatchProcFS(int proc_fd, base::Thread* thread);
+
+ static const char* GetAssertSingleThreadedErrorMessageForTests();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(ThreadHelpers);
+};
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SERVICES_THREAD_HELPERS_H_
diff --git a/sandbox/linux/services/thread_helpers_unittests.cc b/sandbox/linux/services/thread_helpers_unittests.cc
new file mode 100644
index 0000000000..7357a0cfa7
--- /dev/null
+++ b/sandbox/linux/services/thread_helpers_unittests.cc
@@ -0,0 +1,147 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/thread_helpers.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "base/logging.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/posix/eintr_wrapper.h"
+#include "base/process/process_metrics.h"
+#include "base/threading/platform_thread.h"
+#include "base/threading/thread.h"
+#include "build/build_config.h"
+#include "sandbox/linux/tests/unit_tests.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+using base::PlatformThread;
+
+namespace sandbox {
+
+namespace {
+
+// These tests fail under ThreadSanitizer, see http://crbug.com/342305
+#if !defined(THREAD_SANITIZER)
+
+int GetRaceTestIterations() {
+ if (IsRunningOnValgrind()) {
+ return 2;
+ } else {
+ return 1000;
+ }
+}
+
+class ScopedProc {
+ public:
+ ScopedProc() : fd_(-1) {
+ fd_ = open("/proc/", O_RDONLY | O_DIRECTORY);
+ CHECK_LE(0, fd_);
+ }
+
+ ~ScopedProc() { PCHECK(0 == IGNORE_EINTR(close(fd_))); }
+
+ int fd() { return fd_; }
+
+ private:
+ int fd_;
+ DISALLOW_COPY_AND_ASSIGN(ScopedProc);
+};
+
+TEST(ThreadHelpers, IsSingleThreadedBasic) {
+ ScopedProc proc_fd;
+ ASSERT_TRUE(ThreadHelpers::IsSingleThreaded(proc_fd.fd()));
+ ASSERT_TRUE(ThreadHelpers::IsSingleThreaded());
+
+ base::Thread thread("sandbox_tests");
+ ASSERT_TRUE(thread.Start());
+ ASSERT_FALSE(ThreadHelpers::IsSingleThreaded(proc_fd.fd()));
+ ASSERT_FALSE(ThreadHelpers::IsSingleThreaded());
+ // Explicitly stop the thread here to not pollute the next test.
+ ASSERT_TRUE(ThreadHelpers::StopThreadAndWatchProcFS(proc_fd.fd(), &thread));
+}
+
+SANDBOX_TEST(ThreadHelpers, AssertSingleThreaded) {
+ ScopedProc proc_fd;
+ SANDBOX_ASSERT(ThreadHelpers::IsSingleThreaded(proc_fd.fd()));
+ SANDBOX_ASSERT(ThreadHelpers::IsSingleThreaded());
+
+ ThreadHelpers::AssertSingleThreaded(proc_fd.fd());
+ ThreadHelpers::AssertSingleThreaded();
+}
+
+TEST(ThreadHelpers, IsSingleThreadedIterated) {
+ ScopedProc proc_fd;
+ ASSERT_TRUE(ThreadHelpers::IsSingleThreaded(proc_fd.fd()));
+
+ // Iterate to check for race conditions.
+ for (int i = 0; i < GetRaceTestIterations(); ++i) {
+ base::Thread thread("sandbox_tests");
+ ASSERT_TRUE(thread.Start());
+ ASSERT_FALSE(ThreadHelpers::IsSingleThreaded(proc_fd.fd()));
+ // Explicitly stop the thread here to not pollute the next test.
+ ASSERT_TRUE(ThreadHelpers::StopThreadAndWatchProcFS(proc_fd.fd(), &thread));
+ }
+}
+
+TEST(ThreadHelpers, IsSingleThreadedStartAndStop) {
+ ScopedProc proc_fd;
+ ASSERT_TRUE(ThreadHelpers::IsSingleThreaded(proc_fd.fd()));
+
+ base::Thread thread("sandbox_tests");
+ // This is testing for a race condition, so iterate.
+ // Manually, this has been tested with more that 1M iterations.
+ for (int i = 0; i < GetRaceTestIterations(); ++i) {
+ ASSERT_TRUE(thread.Start());
+ ASSERT_FALSE(ThreadHelpers::IsSingleThreaded(proc_fd.fd()));
+
+ ASSERT_TRUE(ThreadHelpers::StopThreadAndWatchProcFS(proc_fd.fd(), &thread));
+ ASSERT_TRUE(ThreadHelpers::IsSingleThreaded(proc_fd.fd()));
+ ASSERT_EQ(1, base::GetNumberOfThreads(base::GetCurrentProcessHandle()));
+ }
+}
+
+SANDBOX_TEST(ThreadHelpers, AssertSingleThreadedAfterThreadStopped) {
+ SANDBOX_ASSERT(ThreadHelpers::IsSingleThreaded());
+
+ base::Thread thread1("sandbox_tests");
+ base::Thread thread2("sandbox_tests");
+
+ for (int i = 0; i < GetRaceTestIterations(); ++i) {
+ SANDBOX_ASSERT(thread1.Start());
+ SANDBOX_ASSERT(thread2.Start());
+ SANDBOX_ASSERT(!ThreadHelpers::IsSingleThreaded());
+
+ thread1.Stop();
+ thread2.Stop();
+ // This will wait on /proc/ to reflect the state of threads in the
+ // process.
+ ThreadHelpers::AssertSingleThreaded();
+ SANDBOX_ASSERT(ThreadHelpers::IsSingleThreaded());
+ }
+}
+
+// Only run this test in Debug mode, where AssertSingleThreaded() will return
+// in less than 64ms.
+#if !defined(NDEBUG)
+SANDBOX_DEATH_TEST(
+ ThreadHelpers,
+ AssertSingleThreadedDies,
+ DEATH_MESSAGE(
+ ThreadHelpers::GetAssertSingleThreadedErrorMessageForTests())) {
+ base::Thread thread1("sandbox_tests");
+ SANDBOX_ASSERT(thread1.Start());
+ ThreadHelpers::AssertSingleThreaded();
+}
+#endif // !defined(NDEBUG)
+
+#endif // !defined(THREAD_SANITIZER)
+
+} // namespace
+
+} // namespace sandbox
diff --git a/sandbox/linux/services/yama.cc b/sandbox/linux/services/yama.cc
new file mode 100644
index 0000000000..151f4bd340
--- /dev/null
+++ b/sandbox/linux/services/yama.cc
@@ -0,0 +1,115 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/services/yama.h"
+
+#include <fcntl.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "base/files/file_util.h"
+#include "base/files/scoped_file.h"
+#include "base/logging.h"
+#include "base/posix/eintr_wrapper.h"
+
+#if !defined(PR_SET_PTRACER_ANY)
+#define PR_SET_PTRACER_ANY ((unsigned long)-1)
+#endif
+
+#if !defined(PR_SET_PTRACER)
+#define PR_SET_PTRACER 0x59616d61
+#endif
+
+namespace sandbox {
+
+namespace {
+
+// Enable or disable the Yama ptracers restrictions.
+// Return false if Yama is not present on this kernel.
+bool SetYamaPtracersRestriction(bool enable_restrictions) {
+ unsigned long set_ptracer_arg;
+ if (enable_restrictions) {
+ set_ptracer_arg = 0;
+ } else {
+ set_ptracer_arg = PR_SET_PTRACER_ANY;
+ }
+
+ const int ret = prctl(PR_SET_PTRACER, set_ptracer_arg);
+ const int prctl_errno = errno;
+
+ if (0 == ret) {
+ return true;
+ } else {
+ // ENOSYS or EINVAL means Yama is not in the current kernel.
+ CHECK(ENOSYS == prctl_errno || EINVAL == prctl_errno);
+ return false;
+ }
+}
+
+bool CanAccessProcFS() {
+ static const char kProcfsKernelSysPath[] = "/proc/sys/kernel/";
+ int ret = access(kProcfsKernelSysPath, F_OK);
+ if (ret) {
+ return false;
+ }
+ return true;
+}
+
+} // namespace
+
+// static
+bool Yama::RestrictPtracersToAncestors() {
+ return SetYamaPtracersRestriction(true /* enable_restrictions */);
+}
+
+// static
+bool Yama::DisableYamaRestrictions() {
+ return SetYamaPtracersRestriction(false /* enable_restrictions */);
+}
+
+// static
+int Yama::GetStatus() {
+ if (!CanAccessProcFS()) {
+ return 0;
+ }
+
+ static const char kPtraceScopePath[] = "/proc/sys/kernel/yama/ptrace_scope";
+
+ base::ScopedFD yama_scope(HANDLE_EINTR(open(kPtraceScopePath, O_RDONLY)));
+
+ if (!yama_scope.is_valid()) {
+ const int open_errno = errno;
+ DCHECK(ENOENT == open_errno);
+ // The status is known, yama is not present.
+ return STATUS_KNOWN;
+ }
+
+ char yama_scope_value = 0;
+ ssize_t num_read = HANDLE_EINTR(read(yama_scope.get(), &yama_scope_value, 1));
+ PCHECK(1 == num_read);
+
+ switch (yama_scope_value) {
+ case '0':
+ return STATUS_KNOWN | STATUS_PRESENT;
+ case '1':
+ return STATUS_KNOWN | STATUS_PRESENT | STATUS_ENFORCING;
+ case '2':
+ case '3':
+ return STATUS_KNOWN | STATUS_PRESENT | STATUS_ENFORCING |
+ STATUS_STRICT_ENFORCING;
+ default:
+ NOTREACHED();
+ return 0;
+ }
+}
+
+// static
+bool Yama::IsPresent() { return GetStatus() & STATUS_PRESENT; }
+
+// static
+bool Yama::IsEnforcing() { return GetStatus() & STATUS_ENFORCING; }
+
+} // namespace sandbox
diff --git a/sandbox/linux/services/yama.h b/sandbox/linux/services/yama.h
new file mode 100644
index 0000000000..e6c5c45b2a
--- /dev/null
+++ b/sandbox/linux/services/yama.h
@@ -0,0 +1,57 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SERVICES_YAMA_H_
+#define SANDBOX_LINUX_SERVICES_YAMA_H_
+
+#include "base/macros.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+
+// Yama is a LSM kernel module which can restrict ptrace().
+// This class provides ways to detect if Yama is present and enabled
+// and to restrict which processes can ptrace the current process.
+class SANDBOX_EXPORT Yama {
+ public:
+ // This enum should be used to set or check a bitmask.
+ // A value of 0 would indicate that the status is not known.
+ enum GlobalStatus {
+ STATUS_KNOWN = 1 << 0,
+ STATUS_PRESENT = 1 << 1,
+ STATUS_ENFORCING = 1 << 2,
+ // STATUS_STRICT_ENFORCING corresponds to either mode 2 or mode 3 of Yama.
+ // Ptrace could be entirely denied, or restricted to CAP_SYS_PTRACE
+ // and PTRACE_TRACEME.
+ STATUS_STRICT_ENFORCING = 1 << 3
+ };
+
+ // Restrict who can ptrace() the current process to its ancestors.
+ // If this succeeds, then Yama is available on this kernel.
+ // However, Yama may not be enforcing at this time.
+ static bool RestrictPtracersToAncestors();
+
+ // Disable Yama restrictions for the current process.
+ // This will fail if Yama is not available on this kernel.
+ // This is meant for testing only. If you need this, implement
+ // a per-pid authorization instead.
+ static bool DisableYamaRestrictions();
+
+ // Checks if Yama is currently in enforcing mode for the machine (not the
+ // current process). This requires access to the filesystem and will use
+ // /proc/sys/kernel/yama/ptrace_scope.
+ static int GetStatus();
+
+ // Helper for checking for STATUS_PRESENT in GetStatus().
+ static bool IsPresent();
+ // Helper for checkking for STATUS_ENFORCING in GetStatus().
+ static bool IsEnforcing();
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Yama);
+};
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SERVICES_YAMA_H_
diff --git a/sandbox/linux/services/yama_unittests.cc b/sandbox/linux/services/yama_unittests.cc
new file mode 100644
index 0000000000..204cfd6a44
--- /dev/null
+++ b/sandbox/linux/services/yama_unittests.cc
@@ -0,0 +1,172 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/ptrace.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "base/bind.h"
+#include "base/compiler_specific.h"
+#include "base/posix/eintr_wrapper.h"
+#include "base/strings/string_util.h"
+#include "base/sys_info.h"
+#include "sandbox/linux/services/scoped_process.h"
+#include "sandbox/linux/services/yama.h"
+#include "sandbox/linux/tests/unit_tests.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace sandbox {
+
+namespace {
+
+bool HasLinux32Bug() {
+#if defined(__i386__)
+ // On 3.2 kernels, yama doesn't work for 32-bit binaries on 64-bit kernels.
+ // This is fixed in 3.4.
+ bool is_kernel_64bit =
+ base::SysInfo::OperatingSystemArchitecture() == "x86_64";
+ bool is_linux = base::SysInfo::OperatingSystemName() == "Linux";
+ bool is_3_dot_2 = base::StartsWithASCII(
+ base::SysInfo::OperatingSystemVersion(), "3.2", /*case_sensitive=*/false);
+ if (is_kernel_64bit && is_linux && is_3_dot_2)
+ return true;
+#endif // defined(__i386__)
+ return false;
+}
+
+bool CanPtrace(pid_t pid) {
+ int ret;
+ ret = ptrace(PTRACE_ATTACH, pid, NULL, NULL);
+ if (ret == -1) {
+ CHECK_EQ(EPERM, errno);
+ return false;
+ }
+ // Wait for the process to be stopped so that it can be detached.
+ siginfo_t process_info;
+ int wait_ret = HANDLE_EINTR(waitid(P_PID, pid, &process_info, WSTOPPED));
+ PCHECK(0 == wait_ret);
+ PCHECK(0 == ptrace(PTRACE_DETACH, pid, NULL, NULL));
+ return true;
+}
+
+// _exit(0) if pid can be ptraced by the current process.
+// _exit(1) otherwise.
+void ExitZeroIfCanPtrace(pid_t pid) {
+ if (CanPtrace(pid)) {
+ _exit(0);
+ } else {
+ _exit(1);
+ }
+}
+
+bool CanSubProcessPtrace(pid_t pid) {
+ ScopedProcess process(base::Bind(&ExitZeroIfCanPtrace, pid));
+ bool signaled;
+ int exit_code = process.WaitForExit(&signaled);
+ CHECK(!signaled);
+ return 0 == exit_code;
+}
+
+// The tests below assume that the system-level configuration will not change
+// while they run.
+
+TEST(Yama, GetStatus) {
+ int status1 = Yama::GetStatus();
+
+ // Check that the value is a possible bitmask.
+ ASSERT_LE(0, status1);
+ ASSERT_GE(Yama::STATUS_KNOWN | Yama::STATUS_PRESENT | Yama::STATUS_ENFORCING |
+ Yama::STATUS_STRICT_ENFORCING,
+ status1);
+
+ // The status should not just be a random value.
+ int status2 = Yama::GetStatus();
+ EXPECT_EQ(status1, status2);
+
+ // This test is not running sandboxed, there is no reason to not know the
+ // status.
+ EXPECT_NE(0, Yama::STATUS_KNOWN & status1);
+
+ if (status1 & Yama::STATUS_STRICT_ENFORCING) {
+ // If Yama is strictly enforcing, it is also enforcing.
+ EXPECT_TRUE(status1 & Yama::STATUS_ENFORCING);
+ }
+
+ if (status1 & Yama::STATUS_ENFORCING) {
+ // If Yama is enforcing, Yama is present.
+ EXPECT_NE(0, status1 & Yama::STATUS_PRESENT);
+ }
+
+ // Verify that the helper functions work as intended.
+ EXPECT_EQ(static_cast<bool>(status1 & Yama::STATUS_ENFORCING),
+ Yama::IsEnforcing());
+ EXPECT_EQ(static_cast<bool>(status1 & Yama::STATUS_PRESENT),
+ Yama::IsPresent());
+
+ fprintf(stdout,
+ "Yama present: %s - enforcing: %s\n",
+ Yama::IsPresent() ? "Y" : "N",
+ Yama::IsEnforcing() ? "Y" : "N");
+}
+
+SANDBOX_TEST(Yama, RestrictPtraceSucceedsWhenYamaPresent) {
+ // This call will succeed iff Yama is present.
+ bool restricted = Yama::RestrictPtracersToAncestors();
+ CHECK_EQ(restricted, Yama::IsPresent());
+}
+
+// Attempts to enable or disable Yama restrictions.
+void SetYamaRestrictions(bool enable_restriction) {
+ if (enable_restriction) {
+ Yama::RestrictPtracersToAncestors();
+ } else {
+ Yama::DisableYamaRestrictions();
+ }
+}
+
+TEST(Yama, RestrictPtraceWorks) {
+ if (HasLinux32Bug())
+ return;
+
+ ScopedProcess process1(base::Bind(&SetYamaRestrictions, true));
+ ASSERT_TRUE(process1.WaitForClosureToRun());
+
+ if (Yama::IsEnforcing()) {
+ // A sibling process cannot ptrace process1.
+ ASSERT_FALSE(CanSubProcessPtrace(process1.GetPid()));
+ }
+
+ if (!(Yama::GetStatus() & Yama::STATUS_STRICT_ENFORCING)) {
+ // However, parent can ptrace process1.
+ ASSERT_TRUE(CanPtrace(process1.GetPid()));
+
+ // A sibling can ptrace process2 which disables any Yama protection.
+ ScopedProcess process2(base::Bind(&SetYamaRestrictions, false));
+ ASSERT_TRUE(process2.WaitForClosureToRun());
+ ASSERT_TRUE(CanSubProcessPtrace(process2.GetPid()));
+ }
+}
+
+void DoNothing() {}
+
+SANDBOX_TEST(Yama, RestrictPtraceIsDefault) {
+ if (!Yama::IsPresent() || HasLinux32Bug())
+ return;
+
+ CHECK(Yama::DisableYamaRestrictions());
+ ScopedProcess process1(base::Bind(&DoNothing));
+
+ if (Yama::IsEnforcing()) {
+ // Check that process1 is protected by Yama, even though it has
+ // been created from a process that disabled Yama.
+ CHECK(!CanSubProcessPtrace(process1.GetPid()));
+ }
+}
+
+} // namespace
+
+} // namespace sandbox
diff --git a/sandbox/linux/syscall_broker/broker_channel.cc b/sandbox/linux/syscall_broker/broker_channel.cc
new file mode 100644
index 0000000000..fa0f7615fc
--- /dev/null
+++ b/sandbox/linux/syscall_broker/broker_channel.cc
@@ -0,0 +1,35 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/syscall_broker/broker_channel.h"
+
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "base/logging.h"
+
+namespace sandbox {
+
+namespace syscall_broker {
+
+// static
+void BrokerChannel::CreatePair(EndPoint* reader, EndPoint* writer) {
+ DCHECK(reader);
+ DCHECK(writer);
+ int socket_pair[2];
+ // Use SOCK_SEQPACKET, to preserve message boundaries but we also want to be
+ // notified (recvmsg should return and not block) when the connection has
+ // been broken which could mean that the other end has been closed.
+ PCHECK(0 == socketpair(AF_UNIX, SOCK_SEQPACKET, 0, socket_pair));
+
+ reader->reset(socket_pair[0]);
+ PCHECK(0 == shutdown(reader->get(), SHUT_WR));
+
+ writer->reset(socket_pair[1]);
+ PCHECK(0 == shutdown(writer->get(), SHUT_RD));
+}
+
+} // namespace syscall_broker
+
+} // namespace sandbox
diff --git a/sandbox/linux/syscall_broker/broker_channel.h b/sandbox/linux/syscall_broker/broker_channel.h
new file mode 100644
index 0000000000..2abdba413a
--- /dev/null
+++ b/sandbox/linux/syscall_broker/broker_channel.h
@@ -0,0 +1,31 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SYSCALL_BROKER_BROKER_CHANNEL_H_
+#define SANDBOX_LINUX_SYSCALL_BROKER_BROKER_CHANNEL_H_
+
+#include "base/files/scoped_file.h"
+#include "base/macros.h"
+
+namespace sandbox {
+
+namespace syscall_broker {
+
+// A small class to create a pipe-like communication channel. It is based on a
+// SOCK_SEQPACKET unix socket, which is connection-based and guaranteed to
+// preserve message boundaries.
+class BrokerChannel {
+ public:
+ typedef base::ScopedFD EndPoint;
+ static void CreatePair(EndPoint* reader, EndPoint* writer);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(BrokerChannel);
+};
+
+} // namespace syscall_broker
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SYSCALL_BROKER_BROKER_CHANNEL_H_
diff --git a/sandbox/linux/syscall_broker/broker_client.cc b/sandbox/linux/syscall_broker/broker_client.cc
new file mode 100644
index 0000000000..760cf59b3c
--- /dev/null
+++ b/sandbox/linux/syscall_broker/broker_client.cc
@@ -0,0 +1,144 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/syscall_broker/broker_client.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "build/build_config.h"
+#include "base/logging.h"
+#include "base/pickle.h"
+#include "base/posix/unix_domain_socket_linux.h"
+#include "sandbox/linux/syscall_broker/broker_channel.h"
+#include "sandbox/linux/syscall_broker/broker_common.h"
+#include "sandbox/linux/syscall_broker/broker_policy.h"
+
+#if defined(OS_ANDROID) && !defined(MSG_CMSG_CLOEXEC)
+#define MSG_CMSG_CLOEXEC 0x40000000
+#endif
+
+namespace sandbox {
+
+namespace syscall_broker {
+
+// Make a remote system call over IPC for syscalls that take a path and flags
+// as arguments, currently open() and access().
+// Will return -errno like a real system call.
+// This function needs to be async signal safe.
+int BrokerClient::PathAndFlagsSyscall(IPCCommand syscall_type,
+ const char* pathname,
+ int flags) const {
+ int recvmsg_flags = 0;
+ RAW_CHECK(syscall_type == COMMAND_OPEN || syscall_type == COMMAND_ACCESS);
+ if (!pathname)
+ return -EFAULT;
+
+ // For this "remote system call" to work, we need to handle any flag that
+ // cannot be sent over a Unix socket in a special way.
+ // See the comments around kCurrentProcessOpenFlagsMask.
+ if (syscall_type == COMMAND_OPEN && (flags & kCurrentProcessOpenFlagsMask)) {
+ // This implementation only knows about O_CLOEXEC, someone needs to look at
+ // this code if other flags are added.
+ RAW_CHECK(kCurrentProcessOpenFlagsMask == O_CLOEXEC);
+ recvmsg_flags |= MSG_CMSG_CLOEXEC;
+ flags &= ~O_CLOEXEC;
+ }
+
+ // There is no point in forwarding a request that we know will be denied.
+ // Of course, the real security check needs to be on the other side of the
+ // IPC.
+ if (fast_check_in_client_) {
+ if (syscall_type == COMMAND_OPEN &&
+ !broker_policy_.GetFileNameIfAllowedToOpen(
+ pathname, flags, NULL /* file_to_open */,
+ NULL /* unlink_after_open */)) {
+ return -broker_policy_.denied_errno();
+ }
+ if (syscall_type == COMMAND_ACCESS &&
+ !broker_policy_.GetFileNameIfAllowedToAccess(pathname, flags, NULL)) {
+ return -broker_policy_.denied_errno();
+ }
+ }
+
+ base::Pickle write_pickle;
+ write_pickle.WriteInt(syscall_type);
+ write_pickle.WriteString(pathname);
+ write_pickle.WriteInt(flags);
+ RAW_CHECK(write_pickle.size() <= kMaxMessageLength);
+
+ int returned_fd = -1;
+ uint8_t reply_buf[kMaxMessageLength];
+
+ // Send a request (in write_pickle) as well that will include a new
+ // temporary socketpair (created internally by SendRecvMsg()).
+ // Then read the reply on this new socketpair in reply_buf and put an
+ // eventual attached file descriptor in |returned_fd|.
+ ssize_t msg_len = base::UnixDomainSocket::SendRecvMsgWithFlags(
+ ipc_channel_.get(), reply_buf, sizeof(reply_buf), recvmsg_flags,
+ &returned_fd, write_pickle);
+ if (msg_len <= 0) {
+ if (!quiet_failures_for_tests_)
+ RAW_LOG(ERROR, "Could not make request to broker process");
+ return -ENOMEM;
+ }
+
+ base::Pickle read_pickle(reinterpret_cast<char*>(reply_buf), msg_len);
+ base::PickleIterator iter(read_pickle);
+ int return_value = -1;
+ // Now deserialize the return value and eventually return the file
+ // descriptor.
+ if (iter.ReadInt(&return_value)) {
+ switch (syscall_type) {
+ case COMMAND_ACCESS:
+ // We should never have a fd to return.
+ RAW_CHECK(returned_fd == -1);
+ return return_value;
+ case COMMAND_OPEN:
+ if (return_value < 0) {
+ RAW_CHECK(returned_fd == -1);
+ return return_value;
+ } else {
+ // We have a real file descriptor to return.
+ RAW_CHECK(returned_fd >= 0);
+ return returned_fd;
+ }
+ default:
+ RAW_LOG(ERROR, "Unsupported command");
+ return -ENOSYS;
+ }
+ } else {
+ RAW_LOG(ERROR, "Could not read pickle");
+ NOTREACHED();
+ return -ENOMEM;
+ }
+}
+
+BrokerClient::BrokerClient(const BrokerPolicy& broker_policy,
+ BrokerChannel::EndPoint ipc_channel,
+ bool fast_check_in_client,
+ bool quiet_failures_for_tests)
+ : broker_policy_(broker_policy),
+ ipc_channel_(ipc_channel.Pass()),
+ fast_check_in_client_(fast_check_in_client),
+ quiet_failures_for_tests_(quiet_failures_for_tests) {
+}
+
+BrokerClient::~BrokerClient() {
+}
+
+int BrokerClient::Access(const char* pathname, int mode) const {
+ return PathAndFlagsSyscall(COMMAND_ACCESS, pathname, mode);
+}
+
+int BrokerClient::Open(const char* pathname, int flags) const {
+ return PathAndFlagsSyscall(COMMAND_OPEN, pathname, flags);
+}
+
+} // namespace syscall_broker
+
+} // namespace sandbox
diff --git a/sandbox/linux/syscall_broker/broker_client.h b/sandbox/linux/syscall_broker/broker_client.h
new file mode 100644
index 0000000000..2dfef8150c
--- /dev/null
+++ b/sandbox/linux/syscall_broker/broker_client.h
@@ -0,0 +1,75 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SYSCALL_BROKER_BROKER_CLIENT_H_
+#define SANDBOX_LINUX_SYSCALL_BROKER_BROKER_CLIENT_H_
+
+#include "base/macros.h"
+#include "sandbox/linux/syscall_broker/broker_channel.h"
+#include "sandbox/linux/syscall_broker/broker_common.h"
+
+namespace sandbox {
+
+namespace syscall_broker {
+
+class BrokerPolicy;
+
+// This class can be embedded in a sandboxed process and can be
+// used to perform certain system calls in another, presumably
+// non-sandboxed process (which embeds BrokerHost).
+// A key feature of this class is the ability to use some of its methods in a
+// thread-safe and async-signal safe way. The goal is to be able to use it to
+// replace the open() or access() system calls happening anywhere in a process
+// (as allowed for instance by seccomp-bpf's SIGSYS mechanism).
+class BrokerClient {
+ public:
+ // |policy| needs to match the policy used by BrokerHost. This
+ // allows to predict some of the requests which will be denied
+ // and save an IPC round trip.
+ // |ipc_channel| needs to be a suitable SOCK_SEQPACKET unix socket.
+ // |fast_check_in_client| should be set to true and
+ // |quiet_failures_for_tests| to false unless you are writing tests.
+ BrokerClient(const BrokerPolicy& policy,
+ BrokerChannel::EndPoint ipc_channel,
+ bool fast_check_in_client,
+ bool quiet_failures_for_tests);
+ ~BrokerClient();
+
+ // Can be used in place of access().
+ // X_OK will always return an error in practice since the broker process
+ // doesn't support execute permissions.
+ // It's similar to the access() system call and will return -errno on errors.
+ // This is async signal safe.
+ int Access(const char* pathname, int mode) const;
+ // Can be used in place of open().
+ // The implementation only supports certain white listed flags and will
+ // return -EPERM on other flags.
+ // It's similar to the open() system call and will return -errno on errors.
+ // This is async signal safe.
+ int Open(const char* pathname, int flags) const;
+
+ // Get the file descriptor used for IPC. This is used for tests.
+ int GetIPCDescriptor() const { return ipc_channel_.get(); }
+
+ private:
+ const BrokerPolicy& broker_policy_;
+ const BrokerChannel::EndPoint ipc_channel_;
+ const bool fast_check_in_client_; // Whether to forward a request that we
+ // know will be denied to the broker. (Used
+ // for tests).
+ const bool quiet_failures_for_tests_; // Disable certain error message when
+ // testing for failures.
+
+ int PathAndFlagsSyscall(IPCCommand syscall_type,
+ const char* pathname,
+ int flags) const;
+
+ DISALLOW_COPY_AND_ASSIGN(BrokerClient);
+};
+
+} // namespace syscall_broker
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SYSCALL_BROKER_BROKER_CLIENT_H_
diff --git a/sandbox/linux/syscall_broker/broker_file_permission.cc b/sandbox/linux/syscall_broker/broker_file_permission.cc
new file mode 100644
index 0000000000..beceda93f5
--- /dev/null
+++ b/sandbox/linux/syscall_broker/broker_file_permission.cc
@@ -0,0 +1,243 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/syscall_broker/broker_file_permission.h"
+
+#include <fcntl.h>
+#include <string.h>
+
+#include <string>
+
+#include "base/logging.h"
+#include "sandbox/linux/syscall_broker/broker_common.h"
+
+namespace sandbox {
+
+namespace syscall_broker {
+
+// Async signal safe
+bool BrokerFilePermission::ValidatePath(const char* path) {
+ if (!path)
+ return false;
+
+ const size_t len = strlen(path);
+ // No empty paths
+ if (len == 0)
+ return false;
+ // Paths must be absolute and not relative
+ if (path[0] != '/')
+ return false;
+ // No trailing / (but "/" is valid)
+ if (len > 1 && path[len - 1] == '/')
+ return false;
+ // No trailing /..
+ if (len >= 3 && path[len - 3] == '/' && path[len - 2] == '.' &&
+ path[len - 1] == '.')
+ return false;
+ // No /../ anywhere
+ for (size_t i = 0; i < len; i++) {
+ if (path[i] == '/' && (len - i) > 3) {
+ if (path[i + 1] == '.' && path[i + 2] == '.' && path[i + 3] == '/') {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+// Async signal safe
+// Calls std::string::c_str(), strncmp and strlen. All these
+// methods are async signal safe in common standard libs.
+// TODO(leecam): remove dependency on std::string
+bool BrokerFilePermission::MatchPath(const char* requested_filename) const {
+ const char* path = path_.c_str();
+ if ((recursive_ && strncmp(requested_filename, path, strlen(path)) == 0)) {
+ // Note: This prefix match will allow any path under the whitelisted
+ // path, for any number of directory levels. E.g. if the whitelisted
+ // path is /good/ then the following will be permitted by the policy.
+ // /good/file1
+ // /good/folder/file2
+ // /good/folder/folder2/file3
+ // If an attacker could make 'folder' a symlink to ../../ they would have
+ // access to the entire filesystem.
+ // Whitelisting with multiple depths is useful, e.g /proc/ but
+ // the system needs to ensure symlinks can not be created!
+ // That said if an attacker can convert any of the absolute paths
+ // to a symlink they can control any file on the system also.
+ return true;
+ } else if (strcmp(requested_filename, path) == 0) {
+ return true;
+ }
+ return false;
+}
+
+// Async signal safe.
+// External call to std::string::c_str() is
+// called in MatchPath.
+// TODO(leecam): remove dependency on std::string
+bool BrokerFilePermission::CheckAccess(const char* requested_filename,
+ int mode,
+ const char** file_to_access) const {
+ // First, check if |mode| is existence, ability to read or ability
+ // to write. We do not support X_OK.
+ if (mode != F_OK && mode & ~(R_OK | W_OK)) {
+ return false;
+ }
+
+ if (!ValidatePath(requested_filename))
+ return false;
+
+ if (!MatchPath(requested_filename)) {
+ return false;
+ }
+ bool allowed = false;
+ switch (mode) {
+ case F_OK:
+ if (allow_read_ || allow_write_)
+ allowed = true;
+ break;
+ case R_OK:
+ if (allow_read_)
+ allowed = true;
+ break;
+ case W_OK:
+ if (allow_write_)
+ allowed = true;
+ break;
+ case R_OK | W_OK:
+ if (allow_read_ && allow_write_)
+ allowed = true;
+ break;
+ default:
+ return false;
+ }
+
+ if (allowed && file_to_access) {
+ if (!recursive_)
+ *file_to_access = path_.c_str();
+ else
+ *file_to_access = requested_filename;
+ }
+ return allowed;
+}
+
+// Async signal safe.
+// External call to std::string::c_str() is
+// called in MatchPath.
+// TODO(leecam): remove dependency on std::string
+bool BrokerFilePermission::CheckOpen(const char* requested_filename,
+ int flags,
+ const char** file_to_open,
+ bool* unlink_after_open) const {
+ if (!ValidatePath(requested_filename))
+ return false;
+
+ if (!MatchPath(requested_filename)) {
+ return false;
+ }
+
+ // First, check the access mode is valid.
+ const int access_mode = flags & O_ACCMODE;
+ if (access_mode != O_RDONLY && access_mode != O_WRONLY &&
+ access_mode != O_RDWR) {
+ return false;
+ }
+
+ // Check if read is allowed
+ if (!allow_read_ && (access_mode == O_RDONLY || access_mode == O_RDWR)) {
+ return false;
+ }
+
+ // Check if write is allowed
+ if (!allow_write_ && (access_mode == O_WRONLY || access_mode == O_RDWR)) {
+ return false;
+ }
+
+ // Check if file creation is allowed.
+ if (!allow_create_ && (flags & O_CREAT)) {
+ return false;
+ }
+
+ // If O_CREAT is present, ensure O_EXCL
+ if ((flags & O_CREAT) && !(flags & O_EXCL)) {
+ return false;
+ }
+
+ // If this file is to be unlinked, ensure it's created.
+ if (unlink_ && !(flags & O_CREAT)) {
+ return false;
+ }
+
+ // Some flags affect the behavior of the current process. We don't support
+ // them and don't allow them for now.
+ if (flags & kCurrentProcessOpenFlagsMask) {
+ return false;
+ }
+
+ // Now check that all the flags are known to us.
+ const int creation_and_status_flags = flags & ~O_ACCMODE;
+
+ const int known_flags = O_APPEND | O_ASYNC | O_CLOEXEC | O_CREAT | O_DIRECT |
+ O_DIRECTORY | O_EXCL | O_LARGEFILE | O_NOATIME |
+ O_NOCTTY | O_NOFOLLOW | O_NONBLOCK | O_NDELAY |
+ O_SYNC | O_TRUNC;
+
+ const int unknown_flags = ~known_flags;
+ const bool has_unknown_flags = creation_and_status_flags & unknown_flags;
+
+ if (has_unknown_flags)
+ return false;
+
+ if (file_to_open) {
+ if (!recursive_)
+ *file_to_open = path_.c_str();
+ else
+ *file_to_open = requested_filename;
+ }
+ if (unlink_after_open)
+ *unlink_after_open = unlink_;
+
+ return true;
+}
+
+const char* BrokerFilePermission::GetErrorMessageForTests() {
+ static char kInvalidBrokerFileString[] = "Invalid BrokerFilePermission";
+ return kInvalidBrokerFileString;
+}
+
+BrokerFilePermission::BrokerFilePermission(const std::string& path,
+ bool recursive,
+ bool unlink,
+ bool allow_read,
+ bool allow_write,
+ bool allow_create)
+ : path_(path),
+ recursive_(recursive),
+ unlink_(unlink),
+ allow_read_(allow_read),
+ allow_write_(allow_write),
+ allow_create_(allow_create) {
+ // Validate this permission and die if invalid!
+
+ // Must have enough length for a '/'
+ CHECK(path_.length() > 0) << GetErrorMessageForTests();
+ // Whitelisted paths must be absolute.
+ CHECK(path_[0] == '/') << GetErrorMessageForTests();
+
+ // Don't allow unlinking on creation without create permission
+ if (unlink_) {
+ CHECK(allow_create) << GetErrorMessageForTests();
+ }
+ const char last_char = *(path_.rbegin());
+ // Recursive paths must have a trailing slash
+ if (recursive_) {
+ CHECK(last_char == '/') << GetErrorMessageForTests();
+ } else {
+ CHECK(last_char != '/') << GetErrorMessageForTests();
+ }
+}
+
+} // namespace syscall_broker
+
+} // namespace sandbox \ No newline at end of file
diff --git a/sandbox/linux/syscall_broker/broker_file_permission.h b/sandbox/linux/syscall_broker/broker_file_permission.h
new file mode 100644
index 0000000000..03300d1d74
--- /dev/null
+++ b/sandbox/linux/syscall_broker/broker_file_permission.h
@@ -0,0 +1,119 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SYSCALL_BROKER_BROKER_FILE_PERMISSION_H_
+#define SANDBOX_LINUX_SYSCALL_BROKER_BROKER_FILE_PERMISSION_H_
+
+#include <string>
+
+#include "base/macros.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+
+namespace syscall_broker {
+
+// BrokerFilePermission defines a path for whitelisting.
+// Pick the correct static factory method to create a permission.
+// CheckOpen and CheckAccess are async signal safe.
+// Constuction and Destruction are not async signal safe.
+// |path| is the path to be whitelisted.
+class SANDBOX_EXPORT BrokerFilePermission {
+ public:
+ ~BrokerFilePermission() {}
+ BrokerFilePermission(const BrokerFilePermission&) = default;
+ BrokerFilePermission& operator=(const BrokerFilePermission&) = default;
+
+ static BrokerFilePermission ReadOnly(const std::string& path) {
+ return BrokerFilePermission(path, false, false, true, false, false);
+ }
+
+ static BrokerFilePermission ReadOnlyRecursive(const std::string& path) {
+ return BrokerFilePermission(path, true, false, true, false, false);
+ }
+
+ static BrokerFilePermission WriteOnly(const std::string& path) {
+ return BrokerFilePermission(path, false, false, false, true, false);
+ }
+
+ static BrokerFilePermission ReadWrite(const std::string& path) {
+ return BrokerFilePermission(path, false, false, true, true, false);
+ }
+
+ static BrokerFilePermission ReadWriteCreate(const std::string& path) {
+ return BrokerFilePermission(path, false, false, true, true, true);
+ }
+
+ static BrokerFilePermission ReadWriteCreateUnlink(const std::string& path) {
+ return BrokerFilePermission(path, false, true, true, true, true);
+ }
+
+ static BrokerFilePermission ReadWriteCreateUnlinkRecursive(
+ const std::string& path) {
+ return BrokerFilePermission(path, true, true, true, true, true);
+ }
+
+ // Returns true if |requested_filename| is allowed to be opened
+ // by this permission.
+ // If |file_to_open| is not NULL it is set to point to either
+ // the |requested_filename| in the case of a recursive match,
+ // or a pointer the matched path in the whitelist if an absolute
+ // match.
+ // If not NULL |unlink_after_open| is set to point to true if the
+ // caller should unlink the path after openning.
+ // Async signal safe if |file_to_open| is NULL.
+ bool CheckOpen(const char* requested_filename,
+ int flags,
+ const char** file_to_open,
+ bool* unlink_after_open) const;
+ // Returns true if |requested_filename| is allowed to be accessed
+ // by this permission as per access(2).
+ // If |file_to_open| is not NULL it is set to point to either
+ // the |requested_filename| in the case of a recursive match,
+ // or a pointer to the matched path in the whitelist if an absolute
+ // match.
+ // |mode| is per mode argument of access(2).
+ // Async signal safe if |file_to_access| is NULL
+ bool CheckAccess(const char* requested_filename,
+ int mode,
+ const char** file_to_access) const;
+
+ private:
+ friend class BrokerFilePermissionTester;
+ BrokerFilePermission(const std::string& path,
+ bool recursive,
+ bool unlink,
+ bool allow_read,
+ bool allow_write,
+ bool allow_create);
+
+ // ValidatePath checks |path| and returns true if these conditions are met
+ // * Greater than 0 length
+ // * Is an absolute path
+ // * No trailing slash
+ // * No /../ path traversal
+ static bool ValidatePath(const char* path);
+
+ // MatchPath returns true if |requested_filename| is covered by this instance
+ bool MatchPath(const char* requested_filename) const;
+
+ // Used in by BrokerFilePermissionTester for tests.
+ static const char* GetErrorMessageForTests();
+
+ // These are not const as std::vector requires copy-assignment and this class
+ // is stored in vectors. All methods are marked const so
+ // the compiler will still enforce no changes outside of the constructor.
+ std::string path_;
+ bool recursive_; // Allow everything under this path. |path| must be a dir.
+ bool unlink_; // unlink after opening.
+ bool allow_read_;
+ bool allow_write_;
+ bool allow_create_;
+};
+
+} // namespace syscall_broker
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SYSCALL_BROKER_BROKER_FILE_PERMISSION_H_ \ No newline at end of file
diff --git a/sandbox/linux/syscall_broker/broker_file_permission_unittest.cc b/sandbox/linux/syscall_broker/broker_file_permission_unittest.cc
new file mode 100644
index 0000000000..b58a901cde
--- /dev/null
+++ b/sandbox/linux/syscall_broker/broker_file_permission_unittest.cc
@@ -0,0 +1,262 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/syscall_broker/broker_file_permission.h"
+
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "sandbox/linux/tests/test_utils.h"
+#include "sandbox/linux/tests/unit_tests.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace sandbox {
+
+namespace syscall_broker {
+
+class BrokerFilePermissionTester {
+ public:
+ static bool ValidatePath(const char* path) {
+ return BrokerFilePermission::ValidatePath(path);
+ }
+ static const char* GetErrorMessage() {
+ return BrokerFilePermission::GetErrorMessageForTests();
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BrokerFilePermissionTester);
+};
+
+namespace {
+
+// Creation tests are DEATH tests as a bad permission causes termination.
+SANDBOX_TEST(BrokerFilePermission, CreateGood) {
+ const char kPath[] = "/tmp/good";
+ BrokerFilePermission perm = BrokerFilePermission::ReadOnly(kPath);
+}
+
+SANDBOX_TEST(BrokerFilePermission, CreateGoodRecursive) {
+ const char kPath[] = "/tmp/good/";
+ BrokerFilePermission perm = BrokerFilePermission::ReadOnlyRecursive(kPath);
+}
+
+SANDBOX_DEATH_TEST(
+ BrokerFilePermission,
+ CreateBad,
+ DEATH_MESSAGE(BrokerFilePermissionTester::GetErrorMessage())) {
+ const char kPath[] = "/tmp/bad/";
+ BrokerFilePermission perm = BrokerFilePermission::ReadOnly(kPath);
+}
+
+SANDBOX_DEATH_TEST(
+ BrokerFilePermission,
+ CreateBadRecursive,
+ DEATH_MESSAGE(BrokerFilePermissionTester::GetErrorMessage())) {
+ const char kPath[] = "/tmp/bad";
+ BrokerFilePermission perm = BrokerFilePermission::ReadOnlyRecursive(kPath);
+}
+
+SANDBOX_DEATH_TEST(
+ BrokerFilePermission,
+ CreateBadNotAbs,
+ DEATH_MESSAGE(BrokerFilePermissionTester::GetErrorMessage())) {
+ const char kPath[] = "tmp/bad";
+ BrokerFilePermission perm = BrokerFilePermission::ReadOnly(kPath);
+}
+
+SANDBOX_DEATH_TEST(
+ BrokerFilePermission,
+ CreateBadEmpty,
+ DEATH_MESSAGE(BrokerFilePermissionTester::GetErrorMessage())) {
+ const char kPath[] = "";
+ BrokerFilePermission perm = BrokerFilePermission::ReadOnly(kPath);
+}
+
+// CheckPerm tests |path| against |perm| given |access_flags|.
+// If |create| is true then file creation is tested for success.
+void CheckPerm(const BrokerFilePermission& perm,
+ const char* path,
+ int access_flags,
+ bool create) {
+ const char* file_to_open = NULL;
+
+ ASSERT_FALSE(perm.CheckAccess(path, X_OK, NULL));
+ ASSERT_TRUE(perm.CheckAccess(path, F_OK, NULL));
+ // check bad perms
+ switch (access_flags) {
+ case O_RDONLY:
+ ASSERT_TRUE(perm.CheckOpen(path, O_RDONLY, &file_to_open, NULL));
+ ASSERT_FALSE(perm.CheckOpen(path, O_WRONLY, &file_to_open, NULL));
+ ASSERT_FALSE(perm.CheckOpen(path, O_RDWR, &file_to_open, NULL));
+ ASSERT_TRUE(perm.CheckAccess(path, R_OK, NULL));
+ ASSERT_FALSE(perm.CheckAccess(path, W_OK, NULL));
+ break;
+ case O_WRONLY:
+ ASSERT_FALSE(perm.CheckOpen(path, O_RDONLY, &file_to_open, NULL));
+ ASSERT_TRUE(perm.CheckOpen(path, O_WRONLY, &file_to_open, NULL));
+ ASSERT_FALSE(perm.CheckOpen(path, O_RDWR, &file_to_open, NULL));
+ ASSERT_FALSE(perm.CheckAccess(path, R_OK, NULL));
+ ASSERT_TRUE(perm.CheckAccess(path, W_OK, NULL));
+ break;
+ case O_RDWR:
+ ASSERT_TRUE(perm.CheckOpen(path, O_RDONLY, &file_to_open, NULL));
+ ASSERT_TRUE(perm.CheckOpen(path, O_WRONLY, &file_to_open, NULL));
+ ASSERT_TRUE(perm.CheckOpen(path, O_RDWR, &file_to_open, NULL));
+ ASSERT_TRUE(perm.CheckAccess(path, R_OK, NULL));
+ ASSERT_TRUE(perm.CheckAccess(path, W_OK, NULL));
+ break;
+ default:
+ // Bad test case
+ NOTREACHED();
+ }
+
+// O_SYNC can be defined as (__O_SYNC|O_DSYNC)
+#ifdef O_DSYNC
+ const int kSyncFlag = O_SYNC & ~O_DSYNC;
+#else
+ const int kSyncFlag = O_SYNC;
+#endif
+
+ const int kNumberOfBitsInOAccMode = 2;
+ static_assert(O_ACCMODE == ((1 << kNumberOfBitsInOAccMode) - 1),
+ "incorrect number of bits");
+ // check every possible flag and act accordingly.
+ // Skipping AccMode bits as they are present in every case.
+ for (int i = kNumberOfBitsInOAccMode; i < 32; i++) {
+ int flag = 1 << i;
+ switch (flag) {
+ case O_APPEND:
+ case O_ASYNC:
+ case O_DIRECT:
+ case O_DIRECTORY:
+#ifdef O_DSYNC
+ case O_DSYNC:
+#endif
+ case O_EXCL:
+ case O_LARGEFILE:
+ case O_NOATIME:
+ case O_NOCTTY:
+ case O_NOFOLLOW:
+ case O_NONBLOCK:
+#if (O_NONBLOCK != O_NDELAY)
+ case O_NDELAY:
+#endif
+ case kSyncFlag:
+ case O_TRUNC:
+ ASSERT_TRUE(
+ perm.CheckOpen(path, access_flags | flag, &file_to_open, NULL));
+ break;
+ case O_CLOEXEC:
+ case O_CREAT:
+ default:
+ ASSERT_FALSE(
+ perm.CheckOpen(path, access_flags | flag, &file_to_open, NULL));
+ }
+ }
+ if (create) {
+ bool unlink;
+ ASSERT_TRUE(perm.CheckOpen(path, O_CREAT | O_EXCL | access_flags,
+ &file_to_open, &unlink));
+ ASSERT_FALSE(unlink);
+ } else {
+ ASSERT_FALSE(perm.CheckOpen(path, O_CREAT | O_EXCL | access_flags,
+ &file_to_open, NULL));
+ }
+}
+
+TEST(BrokerFilePermission, ReadOnly) {
+ const char kPath[] = "/tmp/good";
+ BrokerFilePermission perm = BrokerFilePermission::ReadOnly(kPath);
+ CheckPerm(perm, kPath, O_RDONLY, false);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+TEST(BrokerFilePermission, ReadOnlyRecursive) {
+ const char kPath[] = "/tmp/good/";
+ const char kPathFile[] = "/tmp/good/file";
+ BrokerFilePermission perm = BrokerFilePermission::ReadOnlyRecursive(kPath);
+ CheckPerm(perm, kPathFile, O_RDONLY, false);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+TEST(BrokerFilePermission, WriteOnly) {
+ const char kPath[] = "/tmp/good";
+ BrokerFilePermission perm = BrokerFilePermission::WriteOnly(kPath);
+ CheckPerm(perm, kPath, O_WRONLY, false);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+TEST(BrokerFilePermission, ReadWrite) {
+ const char kPath[] = "/tmp/good";
+ BrokerFilePermission perm = BrokerFilePermission::ReadWrite(kPath);
+ CheckPerm(perm, kPath, O_RDWR, false);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+TEST(BrokerFilePermission, ReadWriteCreate) {
+ const char kPath[] = "/tmp/good";
+ BrokerFilePermission perm = BrokerFilePermission::ReadWriteCreate(kPath);
+ CheckPerm(perm, kPath, O_RDWR, true);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+void CheckUnlink(BrokerFilePermission& perm,
+ const char* path,
+ int access_flags) {
+ bool unlink;
+ ASSERT_FALSE(perm.CheckOpen(path, access_flags, NULL, &unlink));
+ ASSERT_FALSE(perm.CheckOpen(path, access_flags | O_CREAT, NULL, &unlink));
+ ASSERT_TRUE(
+ perm.CheckOpen(path, access_flags | O_CREAT | O_EXCL, NULL, &unlink));
+ ASSERT_TRUE(unlink);
+}
+
+TEST(BrokerFilePermission, ReadWriteCreateUnlink) {
+ const char kPath[] = "/tmp/good";
+ BrokerFilePermission perm =
+ BrokerFilePermission::ReadWriteCreateUnlink(kPath);
+ CheckUnlink(perm, kPath, O_RDWR);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+TEST(BrokerFilePermission, ReadWriteCreateUnlinkRecursive) {
+ const char kPath[] = "/tmp/good/";
+ const char kPathFile[] = "/tmp/good/file";
+ BrokerFilePermission perm =
+ BrokerFilePermission::ReadWriteCreateUnlinkRecursive(kPath);
+ CheckUnlink(perm, kPathFile, O_RDWR);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+TEST(BrokerFilePermission, ValidatePath) {
+ EXPECT_TRUE(BrokerFilePermissionTester::ValidatePath("/path"));
+ EXPECT_TRUE(BrokerFilePermissionTester::ValidatePath("/"));
+ EXPECT_TRUE(BrokerFilePermissionTester::ValidatePath("/..path"));
+
+ EXPECT_FALSE(BrokerFilePermissionTester::ValidatePath(""));
+ EXPECT_FALSE(BrokerFilePermissionTester::ValidatePath("bad"));
+ EXPECT_FALSE(BrokerFilePermissionTester::ValidatePath("/bad/"));
+ EXPECT_FALSE(BrokerFilePermissionTester::ValidatePath("bad/"));
+ EXPECT_FALSE(BrokerFilePermissionTester::ValidatePath("/bad/.."));
+ EXPECT_FALSE(BrokerFilePermissionTester::ValidatePath("/bad/../bad"));
+ EXPECT_FALSE(BrokerFilePermissionTester::ValidatePath("/../bad"));
+}
+
+} // namespace
+
+} // namespace syscall_broker
+
+} // namespace sandbox
diff --git a/sandbox/linux/syscall_broker/broker_host.cc b/sandbox/linux/syscall_broker/broker_host.cc
new file mode 100644
index 0000000000..830b98bf93
--- /dev/null
+++ b/sandbox/linux/syscall_broker/broker_host.cc
@@ -0,0 +1,231 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/syscall_broker/broker_host.h"
+
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <string>
+#include <vector>
+
+#include "base/files/scoped_file.h"
+#include "base/logging.h"
+#include "base/pickle.h"
+#include "base/posix/eintr_wrapper.h"
+#include "base/posix/unix_domain_socket_linux.h"
+#include "sandbox/linux/syscall_broker/broker_common.h"
+#include "sandbox/linux/syscall_broker/broker_policy.h"
+#include "sandbox/linux/system_headers/linux_syscalls.h"
+#include "third_party/valgrind/valgrind.h"
+
+namespace sandbox {
+
+namespace syscall_broker {
+
+namespace {
+
+bool IsRunningOnValgrind() {
+ return RUNNING_ON_VALGRIND;
+}
+
+// A little open(2) wrapper to handle some oddities for us. In the general case
+// make a direct system call since we want to keep in control of the broker
+// process' system calls profile to be able to loosely sandbox it.
+int sys_open(const char* pathname, int flags) {
+ // Hardcode mode to rw------- when creating files.
+ int mode;
+ if (flags & O_CREAT) {
+ mode = 0600;
+ } else {
+ mode = 0;
+ }
+ if (IsRunningOnValgrind()) {
+ // Valgrind does not support AT_FDCWD, just use libc's open() in this case.
+ return open(pathname, flags, mode);
+ } else {
+ return syscall(__NR_openat, AT_FDCWD, pathname, flags, mode);
+ }
+}
+
+// Open |requested_filename| with |flags| if allowed by our policy.
+// Write the syscall return value (-errno) to |write_pickle| and append
+// a file descriptor to |opened_files| if relevant.
+void OpenFileForIPC(const BrokerPolicy& policy,
+ const std::string& requested_filename,
+ int flags,
+ base::Pickle* write_pickle,
+ std::vector<int>* opened_files) {
+ DCHECK(write_pickle);
+ DCHECK(opened_files);
+ const char* file_to_open = NULL;
+ bool unlink_after_open = false;
+ const bool safe_to_open_file = policy.GetFileNameIfAllowedToOpen(
+ requested_filename.c_str(), flags, &file_to_open, &unlink_after_open);
+
+ if (safe_to_open_file) {
+ CHECK(file_to_open);
+ int opened_fd = sys_open(file_to_open, flags);
+ if (opened_fd < 0) {
+ write_pickle->WriteInt(-errno);
+ } else {
+ // Success.
+ if (unlink_after_open) {
+ unlink(file_to_open);
+ }
+ opened_files->push_back(opened_fd);
+ write_pickle->WriteInt(0);
+ }
+ } else {
+ write_pickle->WriteInt(-policy.denied_errno());
+ }
+}
+
+// Perform access(2) on |requested_filename| with mode |mode| if allowed by our
+// policy. Write the syscall return value (-errno) to |write_pickle|.
+void AccessFileForIPC(const BrokerPolicy& policy,
+ const std::string& requested_filename,
+ int mode,
+ base::Pickle* write_pickle) {
+ DCHECK(write_pickle);
+ const char* file_to_access = NULL;
+ const bool safe_to_access_file = policy.GetFileNameIfAllowedToAccess(
+ requested_filename.c_str(), mode, &file_to_access);
+
+ if (safe_to_access_file) {
+ CHECK(file_to_access);
+ int access_ret = access(file_to_access, mode);
+ int access_errno = errno;
+ if (!access_ret)
+ write_pickle->WriteInt(0);
+ else
+ write_pickle->WriteInt(-access_errno);
+ } else {
+ write_pickle->WriteInt(-policy.denied_errno());
+ }
+}
+
+// Handle a |command_type| request contained in |iter| and send the reply
+// on |reply_ipc|.
+// Currently COMMAND_OPEN and COMMAND_ACCESS are supported.
+bool HandleRemoteCommand(const BrokerPolicy& policy,
+ IPCCommand command_type,
+ int reply_ipc,
+ base::PickleIterator iter) {
+ // Currently all commands have two arguments: filename and flags.
+ std::string requested_filename;
+ int flags = 0;
+ if (!iter.ReadString(&requested_filename) || !iter.ReadInt(&flags))
+ return false;
+
+ base::Pickle write_pickle;
+ std::vector<int> opened_files;
+
+ switch (command_type) {
+ case COMMAND_ACCESS:
+ AccessFileForIPC(policy, requested_filename, flags, &write_pickle);
+ break;
+ case COMMAND_OPEN:
+ OpenFileForIPC(
+ policy, requested_filename, flags, &write_pickle, &opened_files);
+ break;
+ default:
+ LOG(ERROR) << "Invalid IPC command";
+ break;
+ }
+
+ CHECK_LE(write_pickle.size(), kMaxMessageLength);
+ ssize_t sent = base::UnixDomainSocket::SendMsg(
+ reply_ipc, write_pickle.data(), write_pickle.size(), opened_files);
+
+ // Close anything we have opened in this process.
+ for (std::vector<int>::iterator it = opened_files.begin();
+ it != opened_files.end();
+ ++it) {
+ int ret = IGNORE_EINTR(close(*it));
+ DCHECK(!ret) << "Could not close file descriptor";
+ }
+
+ if (sent <= 0) {
+ LOG(ERROR) << "Could not send IPC reply";
+ return false;
+ }
+ return true;
+}
+
+} // namespace
+
+BrokerHost::BrokerHost(const BrokerPolicy& broker_policy,
+ BrokerChannel::EndPoint ipc_channel)
+ : broker_policy_(broker_policy), ipc_channel_(ipc_channel.Pass()) {
+}
+
+BrokerHost::~BrokerHost() {
+}
+
+// Handle a request on the IPC channel ipc_channel_.
+// A request should have a file descriptor attached on which we will reply and
+// that we will then close.
+// A request should start with an int that will be used as the command type.
+BrokerHost::RequestStatus BrokerHost::HandleRequest() const {
+ ScopedVector<base::ScopedFD> fds;
+ char buf[kMaxMessageLength];
+ errno = 0;
+ const ssize_t msg_len = base::UnixDomainSocket::RecvMsg(
+ ipc_channel_.get(), buf, sizeof(buf), &fds);
+
+ if (msg_len == 0 || (msg_len == -1 && errno == ECONNRESET)) {
+ // EOF from the client, or the client died, we should die.
+ return RequestStatus::LOST_CLIENT;
+ }
+
+ // The client should send exactly one file descriptor, on which we
+ // will write the reply.
+ // TODO(mdempsky): ScopedVector doesn't have 'at()', only 'operator[]'.
+ if (msg_len < 0 || fds.size() != 1 || fds[0]->get() < 0) {
+ PLOG(ERROR) << "Error reading message from the client";
+ return RequestStatus::FAILURE;
+ }
+
+ base::ScopedFD temporary_ipc(fds[0]->Pass());
+
+ base::Pickle pickle(buf, msg_len);
+ base::PickleIterator iter(pickle);
+ int command_type;
+ if (iter.ReadInt(&command_type)) {
+ bool command_handled = false;
+ // Go through all the possible IPC messages.
+ switch (command_type) {
+ case COMMAND_ACCESS:
+ case COMMAND_OPEN:
+ // We reply on the file descriptor sent to us via the IPC channel.
+ command_handled = HandleRemoteCommand(
+ broker_policy_, static_cast<IPCCommand>(command_type),
+ temporary_ipc.get(), iter);
+ break;
+ default:
+ NOTREACHED();
+ break;
+ }
+
+ if (command_handled) {
+ return RequestStatus::SUCCESS;
+ } else {
+ return RequestStatus::FAILURE;
+ }
+
+ NOTREACHED();
+ }
+
+ LOG(ERROR) << "Error parsing IPC request";
+ return RequestStatus::FAILURE;
+}
+
+} // namespace syscall_broker
+
+} // namespace sandbox
diff --git a/sandbox/linux/syscall_broker/broker_host.h b/sandbox/linux/syscall_broker/broker_host.h
new file mode 100644
index 0000000000..9866507d1c
--- /dev/null
+++ b/sandbox/linux/syscall_broker/broker_host.h
@@ -0,0 +1,41 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SYSCALL_BROKER_BROKER_HOST_H_
+#define SANDBOX_LINUX_SYSCALL_BROKER_BROKER_HOST_H_
+
+#include "base/macros.h"
+#include "sandbox/linux/syscall_broker/broker_channel.h"
+
+namespace sandbox {
+
+namespace syscall_broker {
+
+class BrokerPolicy;
+
+// The BrokerHost class should be embedded in a (presumably not sandboxed)
+// process. It will honor IPC requests from a BrokerClient sent over
+// |ipc_channel| according to |broker_policy|.
+class BrokerHost {
+ public:
+ enum class RequestStatus { LOST_CLIENT = 0, SUCCESS, FAILURE };
+
+ BrokerHost(const BrokerPolicy& broker_policy,
+ BrokerChannel::EndPoint ipc_channel);
+ ~BrokerHost();
+
+ RequestStatus HandleRequest() const;
+
+ private:
+ const BrokerPolicy& broker_policy_;
+ const BrokerChannel::EndPoint ipc_channel_;
+
+ DISALLOW_COPY_AND_ASSIGN(BrokerHost);
+};
+
+} // namespace syscall_broker
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SYSCALL_BROKER_BROKER_HOST_H_
diff --git a/sandbox/linux/syscall_broker/broker_policy.cc b/sandbox/linux/syscall_broker/broker_policy.cc
new file mode 100644
index 0000000000..d9f69e3b81
--- /dev/null
+++ b/sandbox/linux/syscall_broker/broker_policy.cc
@@ -0,0 +1,99 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/syscall_broker/broker_policy.h"
+
+#include <fcntl.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <string>
+#include <vector>
+
+#include "base/logging.h"
+#include "sandbox/linux/syscall_broker/broker_common.h"
+
+namespace sandbox {
+namespace syscall_broker {
+
+BrokerPolicy::BrokerPolicy(int denied_errno,
+ const std::vector<BrokerFilePermission>& permissions)
+ : denied_errno_(denied_errno),
+ permissions_(permissions),
+ num_of_permissions_(permissions.size()) {
+ // The spec guarantees vectors store their elements contiguously
+ // so set up a pointer to array of element so it can be used
+ // in async signal safe code instead of vector operations.
+ if (num_of_permissions_ > 0) {
+ permissions_array_ = &permissions_[0];
+ } else {
+ permissions_array_ = NULL;
+ }
+}
+
+BrokerPolicy::~BrokerPolicy() {
+}
+
+// Check if calling access() should be allowed on |requested_filename| with
+// mode |requested_mode|.
+// Note: access() being a system call to check permissions, this can get a bit
+// confusing. We're checking if calling access() should even be allowed with
+// the same policy we would use for open().
+// If |file_to_access| is not NULL, we will return the matching pointer from
+// the whitelist. For paranoia a caller should then use |file_to_access|. See
+// GetFileNameIfAllowedToOpen() for more explanation.
+// return true if calling access() on this file should be allowed, false
+// otherwise.
+// Async signal safe if and only if |file_to_access| is NULL.
+bool BrokerPolicy::GetFileNameIfAllowedToAccess(
+ const char* requested_filename,
+ int requested_mode,
+ const char** file_to_access) const {
+ if (file_to_access && *file_to_access) {
+ // Make sure that callers never pass a non-empty string. In case callers
+ // wrongly forget to check the return value and look at the string
+ // instead, this could catch bugs.
+ RAW_LOG(FATAL, "*file_to_access should be NULL");
+ return false;
+ }
+ for (size_t i = 0; i < num_of_permissions_; i++) {
+ if (permissions_array_[i].CheckAccess(requested_filename, requested_mode,
+ file_to_access)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+// Check if |requested_filename| can be opened with flags |requested_flags|.
+// If |file_to_open| is not NULL, we will return the matching pointer from the
+// whitelist. For paranoia, a caller should then use |file_to_open| rather
+// than |requested_filename|, so that it never attempts to open an
+// attacker-controlled file name, even if an attacker managed to fool the
+// string comparison mechanism.
+// Return true if opening should be allowed, false otherwise.
+// Async signal safe if and only if |file_to_open| is NULL.
+bool BrokerPolicy::GetFileNameIfAllowedToOpen(const char* requested_filename,
+ int requested_flags,
+ const char** file_to_open,
+ bool* unlink_after_open) const {
+ if (file_to_open && *file_to_open) {
+ // Make sure that callers never pass a non-empty string. In case callers
+ // wrongly forget to check the return value and look at the string
+ // instead, this could catch bugs.
+ RAW_LOG(FATAL, "*file_to_open should be NULL");
+ return false;
+ }
+ for (size_t i = 0; i < num_of_permissions_; i++) {
+ if (permissions_array_[i].CheckOpen(requested_filename, requested_flags,
+ file_to_open, unlink_after_open)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+} // namespace syscall_broker
+
+} // namespace sandbox
diff --git a/sandbox/linux/syscall_broker/broker_policy.h b/sandbox/linux/syscall_broker/broker_policy.h
new file mode 100644
index 0000000000..d5146edc06
--- /dev/null
+++ b/sandbox/linux/syscall_broker/broker_policy.h
@@ -0,0 +1,87 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SYSCALL_BROKER_BROKER_POLICY_H_
+#define SANDBOX_LINUX_SYSCALL_BROKER_BROKER_POLICY_H_
+
+#include <string>
+#include <vector>
+
+#include "base/macros.h"
+
+#include "sandbox/linux/syscall_broker/broker_file_permission.h"
+
+namespace sandbox {
+namespace syscall_broker {
+
+// BrokerPolicy allows to define the security policy enforced by a
+// BrokerHost. The BrokerHost will evaluate requests sent over its
+// IPC channel according to the BrokerPolicy.
+// Some of the methods of this class can be used in an async-signal safe
+// way.
+class BrokerPolicy {
+ public:
+ // |denied_errno| is the error code returned when IPC requests for system
+ // calls such as open() or access() are denied because a file is not in the
+ // whitelist. EACCESS would be a typical value.
+ // |permissions| is a list of BrokerPermission objects that define
+ // what the broker will allow.
+ BrokerPolicy(int denied_errno,
+ const std::vector<BrokerFilePermission>& permissions);
+
+ ~BrokerPolicy();
+
+ // Check if calling access() should be allowed on |requested_filename| with
+ // mode |requested_mode|.
+ // Note: access() being a system call to check permissions, this can get a bit
+ // confusing. We're checking if calling access() should even be allowed with
+ // If |file_to_open| is not NULL, a pointer to the path will be returned.
+ // In the case of a recursive match, this will be the requested_filename,
+ // otherwise it will return the matching pointer from the
+ // whitelist. For paranoia a caller should then use |file_to_access|. See
+ // GetFileNameIfAllowedToOpen() for more explanation.
+ // return true if calling access() on this file should be allowed, false
+ // otherwise.
+ // Async signal safe if and only if |file_to_access| is NULL.
+ bool GetFileNameIfAllowedToAccess(const char* requested_filename,
+ int requested_mode,
+ const char** file_to_access) const;
+
+ // Check if |requested_filename| can be opened with flags |requested_flags|.
+ // If |file_to_open| is not NULL, a pointer to the path will be returned.
+ // In the case of a recursive match, this will be the requested_filename,
+ // otherwise it will return the matching pointer from the
+ // whitelist. For paranoia, a caller should then use |file_to_open| rather
+ // than |requested_filename|, so that it never attempts to open an
+ // attacker-controlled file name, even if an attacker managed to fool the
+ // string comparison mechanism.
+ // |unlink_after_open| if not NULL will be set to point to true if the
+ // policy requests the caller unlink the path after opening.
+ // Return true if opening should be allowed, false otherwise.
+ // Async signal safe if and only if |file_to_open| is NULL.
+ bool GetFileNameIfAllowedToOpen(const char* requested_filename,
+ int requested_flags,
+ const char** file_to_open,
+ bool* unlink_after_open) const;
+ int denied_errno() const { return denied_errno_; }
+
+ private:
+ const int denied_errno_;
+ // The permissions_ vector is used as storage for the BrokerFilePermission
+ // objects but is not referenced outside of the constructor as
+ // vectors are unfriendly in async signal safe code.
+ const std::vector<BrokerFilePermission> permissions_;
+ // permissions_array_ is set up to point to the backing store of
+ // permissions_ and is used in async signal safe methods.
+ const BrokerFilePermission* permissions_array_;
+ const size_t num_of_permissions_;
+
+ DISALLOW_COPY_AND_ASSIGN(BrokerPolicy);
+};
+
+} // namespace syscall_broker
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SYSCALL_BROKER_BROKER_POLICY_H_
diff --git a/sandbox/linux/syscall_broker/broker_process.cc b/sandbox/linux/syscall_broker/broker_process.cc
new file mode 100644
index 0000000000..81131cc4e0
--- /dev/null
+++ b/sandbox/linux/syscall_broker/broker_process.cc
@@ -0,0 +1,120 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/syscall_broker/broker_process.h"
+
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "base/callback.h"
+#include "base/logging.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/posix/eintr_wrapper.h"
+#include "base/process/process_metrics.h"
+#include "build/build_config.h"
+#include "sandbox/linux/syscall_broker/broker_channel.h"
+#include "sandbox/linux/syscall_broker/broker_client.h"
+#include "sandbox/linux/syscall_broker/broker_host.h"
+
+namespace sandbox {
+
+namespace syscall_broker {
+
+BrokerProcess::BrokerProcess(
+ int denied_errno,
+ const std::vector<syscall_broker::BrokerFilePermission>& permissions,
+ bool fast_check_in_client,
+ bool quiet_failures_for_tests)
+ : initialized_(false),
+ fast_check_in_client_(fast_check_in_client),
+ quiet_failures_for_tests_(quiet_failures_for_tests),
+ broker_pid_(-1),
+ policy_(denied_errno, permissions) {
+}
+
+BrokerProcess::~BrokerProcess() {
+ if (initialized_) {
+ if (broker_client_.get()) {
+ // Closing the socket should be enough to notify the child to die,
+ // unless it has been duplicated.
+ CloseChannel();
+ }
+ PCHECK(0 == kill(broker_pid_, SIGKILL));
+ siginfo_t process_info;
+ // Reap the child.
+ int ret = HANDLE_EINTR(waitid(P_PID, broker_pid_, &process_info, WEXITED));
+ PCHECK(0 == ret);
+ }
+}
+
+bool BrokerProcess::Init(
+ const base::Callback<bool(void)>& broker_process_init_callback) {
+ CHECK(!initialized_);
+ BrokerChannel::EndPoint ipc_reader;
+ BrokerChannel::EndPoint ipc_writer;
+ BrokerChannel::CreatePair(&ipc_reader, &ipc_writer);
+
+#if !defined(THREAD_SANITIZER)
+ DCHECK_EQ(1, base::GetNumberOfThreads(base::GetCurrentProcessHandle()));
+#endif
+ int child_pid = fork();
+ if (child_pid == -1) {
+ return false;
+ }
+ if (child_pid) {
+ // We are the parent and we have just forked our broker process.
+ ipc_reader.reset();
+ broker_pid_ = child_pid;
+ broker_client_.reset(new BrokerClient(policy_, ipc_writer.Pass(),
+ fast_check_in_client_,
+ quiet_failures_for_tests_));
+ initialized_ = true;
+ return true;
+ } else {
+ // We are the broker process. Make sure to close the writer's end so that
+ // we get notified if the client disappears.
+ ipc_writer.reset();
+ CHECK(broker_process_init_callback.Run());
+ BrokerHost broker_host(policy_, ipc_reader.Pass());
+ for (;;) {
+ switch (broker_host.HandleRequest()) {
+ case BrokerHost::RequestStatus::LOST_CLIENT:
+ _exit(1);
+ case BrokerHost::RequestStatus::SUCCESS:
+ case BrokerHost::RequestStatus::FAILURE:
+ continue;
+ }
+ }
+ _exit(1);
+ }
+ NOTREACHED();
+ return false;
+}
+
+void BrokerProcess::CloseChannel() {
+ broker_client_.reset();
+}
+
+int BrokerProcess::Access(const char* pathname, int mode) const {
+ RAW_CHECK(initialized_);
+ return broker_client_->Access(pathname, mode);
+}
+
+int BrokerProcess::Open(const char* pathname, int flags) const {
+ RAW_CHECK(initialized_);
+ return broker_client_->Open(pathname, flags);
+}
+
+} // namespace syscall_broker
+
+} // namespace sandbox.
diff --git a/sandbox/linux/syscall_broker/broker_process.h b/sandbox/linux/syscall_broker/broker_process.h
new file mode 100644
index 0000000000..8a512a0c12
--- /dev/null
+++ b/sandbox/linux/syscall_broker/broker_process.h
@@ -0,0 +1,94 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef SANDBOX_LINUX_SERVICES_BROKER_PROCESS_H_
+#define SANDBOX_LINUX_SERVICES_BROKER_PROCESS_H_
+
+#include <string>
+#include <vector>
+
+#include "base/callback_forward.h"
+#include "base/macros.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/pickle.h"
+#include "base/process/process.h"
+#include "sandbox/linux/syscall_broker/broker_policy.h"
+#include "sandbox/sandbox_export.h"
+
+namespace sandbox {
+
+namespace syscall_broker {
+
+class BrokerClient;
+class BrokerFilePermission;
+
+// Create a new "broker" process to which we can send requests via an IPC
+// channel by forking the current process.
+// This is a low level IPC mechanism that is suitable to be called from a
+// signal handler.
+// A process would typically create a broker process before entering
+// sandboxing.
+// 1. BrokerProcess open_broker(read_whitelist, write_whitelist);
+// 2. CHECK(open_broker.Init(NULL));
+// 3. Enable sandbox.
+// 4. Use open_broker.Open() to open files.
+class SANDBOX_EXPORT BrokerProcess {
+ public:
+ // |denied_errno| is the error code returned when methods such as Open()
+ // or Access() are invoked on a file which is not in the whitelist. EACCESS
+ // would be a typical value.
+ // |allowed_r_files| and |allowed_w_files| are white lists of files that can
+ // be opened later via the Open() API, respectively for reading and writing.
+ // A file available read-write should be listed in both.
+ // |fast_check_in_client| and |quiet_failures_for_tests| are reserved for
+ // unit tests, don't use it.
+
+ BrokerProcess(
+ int denied_errno,
+ const std::vector<syscall_broker::BrokerFilePermission>& permissions,
+ bool fast_check_in_client = true,
+ bool quiet_failures_for_tests = false);
+
+ ~BrokerProcess();
+ // Will initialize the broker process. There should be no threads at this
+ // point, since we need to fork().
+ // broker_process_init_callback will be called in the new broker process,
+ // after fork() returns.
+ bool Init(const base::Callback<bool(void)>& broker_process_init_callback);
+
+ // Can be used in place of access(). Will be async signal safe.
+ // X_OK will always return an error in practice since the broker process
+ // doesn't support execute permissions.
+ // It's similar to the access() system call and will return -errno on errors.
+ int Access(const char* pathname, int mode) const;
+ // Can be used in place of open(). Will be async signal safe.
+ // The implementation only supports certain white listed flags and will
+ // return -EPERM on other flags.
+ // It's similar to the open() system call and will return -errno on errors.
+ int Open(const char* pathname, int flags) const;
+
+ int broker_pid() const { return broker_pid_; }
+
+ private:
+ friend class BrokerProcessTestHelper;
+
+ // Close the IPC channel with the other party. This should only be used
+ // by tests an none of the class methods should be used afterwards.
+ void CloseChannel();
+
+ bool initialized_; // Whether we've been through Init() yet.
+ const bool fast_check_in_client_;
+ const bool quiet_failures_for_tests_;
+ pid_t broker_pid_; // The PID of the broker (child).
+ syscall_broker::BrokerPolicy policy_; // The sandboxing policy.
+ scoped_ptr<syscall_broker::BrokerClient> broker_client_;
+
+ DISALLOW_COPY_AND_ASSIGN(BrokerProcess);
+};
+
+} // namespace syscall_broker
+
+} // namespace sandbox
+
+#endif // SANDBOX_LINUX_SERVICES_BROKER_PROCESS_H_
diff --git a/sandbox/linux/syscall_broker/broker_process_unittest.cc b/sandbox/linux/syscall_broker/broker_process_unittest.cc
new file mode 100644
index 0000000000..9ad0e719de
--- /dev/null
+++ b/sandbox/linux/syscall_broker/broker_process_unittest.cc
@@ -0,0 +1,656 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "sandbox/linux/syscall_broker/broker_process.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "base/bind.h"
+#include "base/files/file_util.h"
+#include "base/files/scoped_file.h"
+#include "base/logging.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/posix/eintr_wrapper.h"
+#include "base/posix/unix_domain_socket_linux.h"
+#include "sandbox/linux/syscall_broker/broker_client.h"
+#include "sandbox/linux/tests/scoped_temporary_file.h"
+#include "sandbox/linux/tests/test_utils.h"
+#include "sandbox/linux/tests/unit_tests.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace sandbox {
+
+namespace syscall_broker {
+
+class BrokerProcessTestHelper {
+ public:
+ static void CloseChannel(BrokerProcess* broker) { broker->CloseChannel(); }
+ // Get the client's IPC descriptor to send IPC requests directly.
+ // TODO(jln): refator tests to get rid of this.
+ static int GetIPCDescriptor(const BrokerProcess* broker) {
+ return broker->broker_client_->GetIPCDescriptor();
+ }
+};
+
+namespace {
+
+bool NoOpCallback() {
+ return true;
+}
+
+} // namespace
+
+TEST(BrokerProcess, CreateAndDestroy) {
+ std::vector<BrokerFilePermission> permissions;
+ permissions.push_back(BrokerFilePermission::ReadOnly("/proc/cpuinfo"));
+
+ scoped_ptr<BrokerProcess> open_broker(new BrokerProcess(EPERM, permissions));
+ ASSERT_TRUE(open_broker->Init(base::Bind(&NoOpCallback)));
+
+ ASSERT_TRUE(TestUtils::CurrentProcessHasChildren());
+ // Destroy the broker and check it has exited properly.
+ open_broker.reset();
+ ASSERT_FALSE(TestUtils::CurrentProcessHasChildren());
+}
+
+TEST(BrokerProcess, TestOpenAccessNull) {
+ std::vector<BrokerFilePermission> empty;
+ BrokerProcess open_broker(EPERM, empty);
+ ASSERT_TRUE(open_broker.Init(base::Bind(&NoOpCallback)));
+
+ int fd = open_broker.Open(NULL, O_RDONLY);
+ ASSERT_EQ(fd, -EFAULT);
+
+ int ret = open_broker.Access(NULL, F_OK);
+ ASSERT_EQ(ret, -EFAULT);
+}
+
+void TestOpenFilePerms(bool fast_check_in_client, int denied_errno) {
+ const char kR_WhiteListed[] = "/proc/DOESNOTEXIST1";
+ // We can't debug the init process, and shouldn't be able to access
+ // its auxv file.
+ const char kR_WhiteListedButDenied[] = "/proc/1/auxv";
+ const char kW_WhiteListed[] = "/proc/DOESNOTEXIST2";
+ const char kRW_WhiteListed[] = "/proc/DOESNOTEXIST3";
+ const char k_NotWhitelisted[] = "/proc/DOESNOTEXIST4";
+
+ std::vector<BrokerFilePermission> permissions;
+ permissions.push_back(BrokerFilePermission::ReadOnly(kR_WhiteListed));
+ permissions.push_back(
+ BrokerFilePermission::ReadOnly(kR_WhiteListedButDenied));
+ permissions.push_back(BrokerFilePermission::WriteOnly(kW_WhiteListed));
+ permissions.push_back(BrokerFilePermission::ReadWrite(kRW_WhiteListed));
+
+ BrokerProcess open_broker(denied_errno, permissions, fast_check_in_client);
+ ASSERT_TRUE(open_broker.Init(base::Bind(&NoOpCallback)));
+
+ int fd = -1;
+ fd = open_broker.Open(kR_WhiteListed, O_RDONLY);
+ ASSERT_EQ(fd, -ENOENT);
+ fd = open_broker.Open(kR_WhiteListed, O_WRONLY);
+ ASSERT_EQ(fd, -denied_errno);
+ fd = open_broker.Open(kR_WhiteListed, O_RDWR);
+ ASSERT_EQ(fd, -denied_errno);
+ int ret = -1;
+ ret = open_broker.Access(kR_WhiteListed, F_OK);
+ ASSERT_EQ(ret, -ENOENT);
+ ret = open_broker.Access(kR_WhiteListed, R_OK);
+ ASSERT_EQ(ret, -ENOENT);
+ ret = open_broker.Access(kR_WhiteListed, W_OK);
+ ASSERT_EQ(ret, -denied_errno);
+ ret = open_broker.Access(kR_WhiteListed, R_OK | W_OK);
+ ASSERT_EQ(ret, -denied_errno);
+ ret = open_broker.Access(kR_WhiteListed, X_OK);
+ ASSERT_EQ(ret, -denied_errno);
+ ret = open_broker.Access(kR_WhiteListed, R_OK | X_OK);
+ ASSERT_EQ(ret, -denied_errno);
+
+ // Android sometimes runs tests as root.
+ // This part of the test requires a process that doesn't have
+ // CAP_DAC_OVERRIDE. We check against a root euid as a proxy for that.
+ if (geteuid()) {
+ fd = open_broker.Open(kR_WhiteListedButDenied, O_RDONLY);
+ // The broker process will allow this, but the normal permission system
+ // won't.
+ ASSERT_EQ(fd, -EACCES);
+ fd = open_broker.Open(kR_WhiteListedButDenied, O_WRONLY);
+ ASSERT_EQ(fd, -denied_errno);
+ fd = open_broker.Open(kR_WhiteListedButDenied, O_RDWR);
+ ASSERT_EQ(fd, -denied_errno);
+ ret = open_broker.Access(kR_WhiteListedButDenied, F_OK);
+ // The normal permission system will let us check that the file exists.
+ ASSERT_EQ(ret, 0);
+ ret = open_broker.Access(kR_WhiteListedButDenied, R_OK);
+ ASSERT_EQ(ret, -EACCES);
+ ret = open_broker.Access(kR_WhiteListedButDenied, W_OK);
+ ASSERT_EQ(ret, -denied_errno);
+ ret = open_broker.Access(kR_WhiteListedButDenied, R_OK | W_OK);
+ ASSERT_EQ(ret, -denied_errno);
+ ret = open_broker.Access(kR_WhiteListedButDenied, X_OK);
+ ASSERT_EQ(ret, -denied_errno);
+ ret = open_broker.Access(kR_WhiteListedButDenied, R_OK | X_OK);
+ ASSERT_EQ(ret, -denied_errno);
+ }
+
+ fd = open_broker.Open(kW_WhiteListed, O_RDONLY);
+ ASSERT_EQ(fd, -denied_errno);
+ fd = open_broker.Open(kW_WhiteListed, O_WRONLY);
+ ASSERT_EQ(fd, -ENOENT);
+ fd = open_broker.Open(kW_WhiteListed, O_RDWR);
+ ASSERT_EQ(fd, -denied_errno);
+ ret = open_broker.Access(kW_WhiteListed, F_OK);
+ ASSERT_EQ(ret, -ENOENT);
+ ret = open_broker.Access(kW_WhiteListed, R_OK);
+ ASSERT_EQ(ret, -denied_errno);
+ ret = open_broker.Access(kW_WhiteListed, W_OK);
+ ASSERT_EQ(ret, -ENOENT);
+ ret = open_broker.Access(kW_WhiteListed, R_OK | W_OK);
+ ASSERT_EQ(ret, -denied_errno);
+ ret = open_broker.Access(kW_WhiteListed, X_OK);
+ ASSERT_EQ(ret, -denied_errno);
+ ret = open_broker.Access(kW_WhiteListed, R_OK | X_OK);
+ ASSERT_EQ(ret, -denied_errno);
+
+ fd = open_broker.Open(kRW_WhiteListed, O_RDONLY);
+ ASSERT_EQ(fd, -ENOENT);
+ fd = open_broker.Open(kRW_WhiteListed, O_WRONLY);
+ ASSERT_EQ(fd, -ENOENT);
+ fd = open_broker.Open(kRW_WhiteListed, O_RDWR);
+ ASSERT_EQ(fd, -ENOENT);
+ ret = open_broker.Access(kRW_WhiteListed, F_OK);
+ ASSERT_EQ(ret, -ENOENT);
+ ret = open_broker.Access(kRW_WhiteListed, R_OK);
+ ASSERT_EQ(ret, -ENOENT);
+ ret = open_broker.Access(kRW_WhiteListed, W_OK);
+ ASSERT_EQ(ret, -ENOENT);
+ ret = open_broker.Access(kRW_WhiteListed, R_OK | W_OK);
+ ASSERT_EQ(ret, -ENOENT);
+ ret = open_broker.Access(kRW_WhiteListed, X_OK);
+ ASSERT_EQ(ret, -denied_errno);
+ ret = open_broker.Access(kRW_WhiteListed, R_OK | X_OK);
+ ASSERT_EQ(ret, -denied_errno);
+
+ fd = open_broker.Open(k_NotWhitelisted, O_RDONLY);
+ ASSERT_EQ(fd, -denied_errno);
+ fd = open_broker.Open(k_NotWhitelisted, O_WRONLY);
+ ASSERT_EQ(fd, -denied_errno);
+ fd = open_broker.Open(k_NotWhitelisted, O_RDWR);
+ ASSERT_EQ(fd, -denied_errno);
+ ret = open_broker.Access(k_NotWhitelisted, F_OK);
+ ASSERT_EQ(ret, -denied_errno);
+ ret = open_broker.Access(k_NotWhitelisted, R_OK);
+ ASSERT_EQ(ret, -denied_errno);
+ ret = open_broker.Access(k_NotWhitelisted, W_OK);
+ ASSERT_EQ(ret, -denied_errno);
+ ret = open_broker.Access(k_NotWhitelisted, R_OK | W_OK);
+ ASSERT_EQ(ret, -denied_errno);
+ ret = open_broker.Access(k_NotWhitelisted, X_OK);
+ ASSERT_EQ(ret, -denied_errno);
+ ret = open_broker.Access(k_NotWhitelisted, R_OK | X_OK);
+ ASSERT_EQ(ret, -denied_errno);
+
+ // We have some extra sanity check for clearly wrong values.
+ fd = open_broker.Open(kRW_WhiteListed, O_RDONLY | O_WRONLY | O_RDWR);
+ ASSERT_EQ(fd, -denied_errno);
+
+ // It makes no sense to allow O_CREAT in a 2-parameters open. Ensure this
+ // is denied.
+ fd = open_broker.Open(kRW_WhiteListed, O_RDWR | O_CREAT);
+ ASSERT_EQ(fd, -denied_errno);
+}
+
+// Run the same thing twice. The second time, we make sure that no security
+// check is performed on the client.
+TEST(BrokerProcess, OpenFilePermsWithClientCheck) {
+ TestOpenFilePerms(true /* fast_check_in_client */, EPERM);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+TEST(BrokerProcess, OpenOpenFilePermsNoClientCheck) {
+ TestOpenFilePerms(false /* fast_check_in_client */, EPERM);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+// Run the same twice again, but with ENOENT instead of EPERM.
+TEST(BrokerProcess, OpenFilePermsWithClientCheckNoEnt) {
+ TestOpenFilePerms(true /* fast_check_in_client */, ENOENT);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+TEST(BrokerProcess, OpenOpenFilePermsNoClientCheckNoEnt) {
+ TestOpenFilePerms(false /* fast_check_in_client */, ENOENT);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+void TestBadPaths(bool fast_check_in_client) {
+ const char kFileCpuInfo[] = "/proc/cpuinfo";
+ const char kNotAbsPath[] = "proc/cpuinfo";
+ const char kDotDotStart[] = "/../proc/cpuinfo";
+ const char kDotDotMiddle[] = "/proc/self/../cpuinfo";
+ const char kDotDotEnd[] = "/proc/..";
+ const char kTrailingSlash[] = "/proc/";
+
+ std::vector<BrokerFilePermission> permissions;
+
+ permissions.push_back(BrokerFilePermission::ReadOnlyRecursive("/proc/"));
+ scoped_ptr<BrokerProcess> open_broker(
+ new BrokerProcess(EPERM, permissions, fast_check_in_client));
+ ASSERT_TRUE(open_broker->Init(base::Bind(&NoOpCallback)));
+ // Open cpuinfo via the broker.
+ int cpuinfo_fd = open_broker->Open(kFileCpuInfo, O_RDONLY);
+ base::ScopedFD cpuinfo_fd_closer(cpuinfo_fd);
+ ASSERT_GE(cpuinfo_fd, 0);
+
+ int fd = -1;
+ int can_access;
+
+ can_access = open_broker->Access(kNotAbsPath, R_OK);
+ ASSERT_EQ(can_access, -EPERM);
+ fd = open_broker->Open(kNotAbsPath, O_RDONLY);
+ ASSERT_EQ(fd, -EPERM);
+
+ can_access = open_broker->Access(kDotDotStart, R_OK);
+ ASSERT_EQ(can_access, -EPERM);
+ fd = open_broker->Open(kDotDotStart, O_RDONLY);
+ ASSERT_EQ(fd, -EPERM);
+
+ can_access = open_broker->Access(kDotDotMiddle, R_OK);
+ ASSERT_EQ(can_access, -EPERM);
+ fd = open_broker->Open(kDotDotMiddle, O_RDONLY);
+ ASSERT_EQ(fd, -EPERM);
+
+ can_access = open_broker->Access(kDotDotEnd, R_OK);
+ ASSERT_EQ(can_access, -EPERM);
+ fd = open_broker->Open(kDotDotEnd, O_RDONLY);
+ ASSERT_EQ(fd, -EPERM);
+
+ can_access = open_broker->Access(kTrailingSlash, R_OK);
+ ASSERT_EQ(can_access, -EPERM);
+ fd = open_broker->Open(kTrailingSlash, O_RDONLY);
+ ASSERT_EQ(fd, -EPERM);
+}
+
+TEST(BrokerProcess, BadPathsClientCheck) {
+ TestBadPaths(true /* fast_check_in_client */);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+TEST(BrokerProcess, BadPathsNoClientCheck) {
+ TestBadPaths(false /* fast_check_in_client */);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+void TestOpenCpuinfo(bool fast_check_in_client, bool recursive) {
+ const char kFileCpuInfo[] = "/proc/cpuinfo";
+ const char kDirProc[] = "/proc/";
+
+ std::vector<BrokerFilePermission> permissions;
+ if (recursive)
+ permissions.push_back(BrokerFilePermission::ReadOnlyRecursive(kDirProc));
+ else
+ permissions.push_back(BrokerFilePermission::ReadOnly(kFileCpuInfo));
+
+ scoped_ptr<BrokerProcess> open_broker(
+ new BrokerProcess(EPERM, permissions, fast_check_in_client));
+ ASSERT_TRUE(open_broker->Init(base::Bind(&NoOpCallback)));
+
+ int fd = -1;
+ fd = open_broker->Open(kFileCpuInfo, O_RDWR);
+ base::ScopedFD fd_closer(fd);
+ ASSERT_EQ(fd, -EPERM);
+
+ // Check we can read /proc/cpuinfo.
+ int can_access = open_broker->Access(kFileCpuInfo, R_OK);
+ ASSERT_EQ(can_access, 0);
+ can_access = open_broker->Access(kFileCpuInfo, W_OK);
+ ASSERT_EQ(can_access, -EPERM);
+ // Check we can not write /proc/cpuinfo.
+
+ // Open cpuinfo via the broker.
+ int cpuinfo_fd = open_broker->Open(kFileCpuInfo, O_RDONLY);
+ base::ScopedFD cpuinfo_fd_closer(cpuinfo_fd);
+ ASSERT_GE(cpuinfo_fd, 0);
+ char buf[3];
+ memset(buf, 0, sizeof(buf));
+ int read_len1 = read(cpuinfo_fd, buf, sizeof(buf));
+ ASSERT_GT(read_len1, 0);
+
+ // Open cpuinfo directly.
+ int cpuinfo_fd2 = open(kFileCpuInfo, O_RDONLY);
+ base::ScopedFD cpuinfo_fd2_closer(cpuinfo_fd2);
+ ASSERT_GE(cpuinfo_fd2, 0);
+ char buf2[3];
+ memset(buf2, 1, sizeof(buf2));
+ int read_len2 = read(cpuinfo_fd2, buf2, sizeof(buf2));
+ ASSERT_GT(read_len1, 0);
+
+ // The following is not guaranteed true, but will be in practice.
+ ASSERT_EQ(read_len1, read_len2);
+ // Compare the cpuinfo as returned by the broker with the one we opened
+ // ourselves.
+ ASSERT_EQ(memcmp(buf, buf2, read_len1), 0);
+
+ ASSERT_TRUE(TestUtils::CurrentProcessHasChildren());
+ open_broker.reset();
+ ASSERT_FALSE(TestUtils::CurrentProcessHasChildren());
+}
+
+// Run this test 4 times. With and without the check in client
+// and using a recursive path.
+TEST(BrokerProcess, OpenCpuinfoWithClientCheck) {
+ TestOpenCpuinfo(true /* fast_check_in_client */, false /* not recursive */);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+TEST(BrokerProcess, OpenCpuinfoNoClientCheck) {
+ TestOpenCpuinfo(false /* fast_check_in_client */, false /* not recursive */);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+TEST(BrokerProcess, OpenCpuinfoWithClientCheckRecursive) {
+ TestOpenCpuinfo(true /* fast_check_in_client */, true /* recursive */);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+TEST(BrokerProcess, OpenCpuinfoNoClientCheckRecursive) {
+ TestOpenCpuinfo(false /* fast_check_in_client */, true /* recursive */);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+TEST(BrokerProcess, OpenFileRW) {
+ ScopedTemporaryFile tempfile;
+ const char* tempfile_name = tempfile.full_file_name();
+
+ std::vector<BrokerFilePermission> permissions;
+ permissions.push_back(BrokerFilePermission::ReadWrite(tempfile_name));
+
+ BrokerProcess open_broker(EPERM, permissions);
+ ASSERT_TRUE(open_broker.Init(base::Bind(&NoOpCallback)));
+
+ // Check we can access that file with read or write.
+ int can_access = open_broker.Access(tempfile_name, R_OK | W_OK);
+ ASSERT_EQ(can_access, 0);
+
+ int tempfile2 = -1;
+ tempfile2 = open_broker.Open(tempfile_name, O_RDWR);
+ ASSERT_GE(tempfile2, 0);
+
+ // Write to the descriptor opened by the broker.
+ char test_text[] = "TESTTESTTEST";
+ ssize_t len = write(tempfile2, test_text, sizeof(test_text));
+ ASSERT_EQ(len, static_cast<ssize_t>(sizeof(test_text)));
+
+ // Read back from the original file descriptor what we wrote through
+ // the descriptor provided by the broker.
+ char buf[1024];
+ len = read(tempfile.fd(), buf, sizeof(buf));
+
+ ASSERT_EQ(len, static_cast<ssize_t>(sizeof(test_text)));
+ ASSERT_EQ(memcmp(test_text, buf, sizeof(test_text)), 0);
+
+ ASSERT_EQ(close(tempfile2), 0);
+}
+
+// SANDBOX_TEST because the process could die with a SIGPIPE
+// and we want this to happen in a subprocess.
+SANDBOX_TEST(BrokerProcess, BrokerDied) {
+ const char kCpuInfo[] = "/proc/cpuinfo";
+ std::vector<BrokerFilePermission> permissions;
+ permissions.push_back(BrokerFilePermission::ReadOnly(kCpuInfo));
+
+ BrokerProcess open_broker(EPERM, permissions, true /* fast_check_in_client */,
+ true /* quiet_failures_for_tests */);
+ SANDBOX_ASSERT(open_broker.Init(base::Bind(&NoOpCallback)));
+ const pid_t broker_pid = open_broker.broker_pid();
+ SANDBOX_ASSERT(kill(broker_pid, SIGKILL) == 0);
+
+ // Now we check that the broker has been signaled, but do not reap it.
+ siginfo_t process_info;
+ SANDBOX_ASSERT(HANDLE_EINTR(waitid(
+ P_PID, broker_pid, &process_info, WEXITED | WNOWAIT)) ==
+ 0);
+ SANDBOX_ASSERT(broker_pid == process_info.si_pid);
+ SANDBOX_ASSERT(CLD_KILLED == process_info.si_code);
+ SANDBOX_ASSERT(SIGKILL == process_info.si_status);
+
+ // Check that doing Open with a dead broker won't SIGPIPE us.
+ SANDBOX_ASSERT(open_broker.Open(kCpuInfo, O_RDONLY) == -ENOMEM);
+ SANDBOX_ASSERT(open_broker.Access(kCpuInfo, O_RDONLY) == -ENOMEM);
+}
+
+void TestOpenComplexFlags(bool fast_check_in_client) {
+ const char kCpuInfo[] = "/proc/cpuinfo";
+ std::vector<BrokerFilePermission> permissions;
+ permissions.push_back(BrokerFilePermission::ReadOnly(kCpuInfo));
+
+ BrokerProcess open_broker(EPERM, permissions, fast_check_in_client);
+ ASSERT_TRUE(open_broker.Init(base::Bind(&NoOpCallback)));
+ // Test that we do the right thing for O_CLOEXEC and O_NONBLOCK.
+ int fd = -1;
+ int ret = 0;
+ fd = open_broker.Open(kCpuInfo, O_RDONLY);
+ ASSERT_GE(fd, 0);
+ ret = fcntl(fd, F_GETFL);
+ ASSERT_NE(-1, ret);
+ // The descriptor shouldn't have the O_CLOEXEC attribute, nor O_NONBLOCK.
+ ASSERT_EQ(0, ret & (O_CLOEXEC | O_NONBLOCK));
+ ASSERT_EQ(0, close(fd));
+
+ fd = open_broker.Open(kCpuInfo, O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+ ret = fcntl(fd, F_GETFD);
+ ASSERT_NE(-1, ret);
+ // Important: use F_GETFD, not F_GETFL. The O_CLOEXEC flag in F_GETFL
+ // is actually not used by the kernel.
+ ASSERT_TRUE(FD_CLOEXEC & ret);
+ ASSERT_EQ(0, close(fd));
+
+ fd = open_broker.Open(kCpuInfo, O_RDONLY | O_NONBLOCK);
+ ASSERT_GE(fd, 0);
+ ret = fcntl(fd, F_GETFL);
+ ASSERT_NE(-1, ret);
+ ASSERT_TRUE(O_NONBLOCK & ret);
+ ASSERT_EQ(0, close(fd));
+}
+
+TEST(BrokerProcess, OpenComplexFlagsWithClientCheck) {
+ TestOpenComplexFlags(true /* fast_check_in_client */);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+TEST(BrokerProcess, OpenComplexFlagsNoClientCheck) {
+ TestOpenComplexFlags(false /* fast_check_in_client */);
+ // Don't do anything here, so that ASSERT works in the subfunction as
+ // expected.
+}
+
+// We need to allow noise because the broker will log when it receives our
+// bogus IPCs.
+SANDBOX_TEST_ALLOW_NOISE(BrokerProcess, RecvMsgDescriptorLeak) {
+ // Android creates a socket on first use of the LOG call.
+ // We need to ensure this socket is open before we
+ // begin the test.
+ LOG(INFO) << "Ensure Android LOG socket is allocated";
+
+ // Find the four lowest available file descriptors.
+ int available_fds[4];
+ SANDBOX_ASSERT(0 == pipe(available_fds));
+ SANDBOX_ASSERT(0 == pipe(available_fds + 2));
+
+ // Save one FD to send to the broker later, and close the others.
+ base::ScopedFD message_fd(available_fds[0]);
+ for (size_t i = 1; i < arraysize(available_fds); i++) {
+ SANDBOX_ASSERT(0 == IGNORE_EINTR(close(available_fds[i])));
+ }
+
+ // Lower our file descriptor limit to just allow three more file descriptors
+ // to be allocated. (N.B., RLIMIT_NOFILE doesn't limit the number of file
+ // descriptors a process can have: it only limits the highest value that can
+ // be assigned to newly-created descriptors allocated by the process.)
+ const rlim_t fd_limit =
+ 1 +
+ *std::max_element(available_fds,
+ available_fds + arraysize(available_fds));
+
+ // Valgrind doesn't allow changing the hard descriptor limit, so we only
+ // change the soft descriptor limit here.
+ struct rlimit rlim;
+ SANDBOX_ASSERT(0 == getrlimit(RLIMIT_NOFILE, &rlim));
+ SANDBOX_ASSERT(fd_limit <= rlim.rlim_cur);
+ rlim.rlim_cur = fd_limit;
+ SANDBOX_ASSERT(0 == setrlimit(RLIMIT_NOFILE, &rlim));
+
+ static const char kCpuInfo[] = "/proc/cpuinfo";
+ std::vector<BrokerFilePermission> permissions;
+ permissions.push_back(BrokerFilePermission::ReadOnly(kCpuInfo));
+
+ BrokerProcess open_broker(EPERM, permissions);
+ SANDBOX_ASSERT(open_broker.Init(base::Bind(&NoOpCallback)));
+
+ const int ipc_fd = BrokerProcessTestHelper::GetIPCDescriptor(&open_broker);
+ SANDBOX_ASSERT(ipc_fd >= 0);
+
+ static const char kBogus[] = "not a pickle";
+ std::vector<int> fds;
+ fds.push_back(message_fd.get());
+
+ // The broker process should only have a couple spare file descriptors
+ // available, but for good measure we send it fd_limit bogus IPCs anyway.
+ for (rlim_t i = 0; i < fd_limit; ++i) {
+ SANDBOX_ASSERT(
+ base::UnixDomainSocket::SendMsg(ipc_fd, kBogus, sizeof(kBogus), fds));
+ }
+
+ const int fd = open_broker.Open(kCpuInfo, O_RDONLY);
+ SANDBOX_ASSERT(fd >= 0);
+ SANDBOX_ASSERT(0 == IGNORE_EINTR(close(fd)));
+}
+
+bool CloseFD(int fd) {
+ PCHECK(0 == IGNORE_EINTR(close(fd)));
+ return true;
+}
+
+// Return true if the other end of the |reader| pipe was closed,
+// false if |timeout_in_seconds| was reached or another event
+// or error occured.
+bool WaitForClosedPipeWriter(int reader, int timeout_in_ms) {
+ struct pollfd poll_fd = {reader, POLLIN | POLLRDHUP, 0};
+ const int num_events = HANDLE_EINTR(poll(&poll_fd, 1, timeout_in_ms));
+ if (1 == num_events && poll_fd.revents | POLLHUP)
+ return true;
+ return false;
+}
+
+// Closing the broker client's IPC channel should terminate the broker
+// process.
+TEST(BrokerProcess, BrokerDiesOnClosedChannel) {
+ std::vector<BrokerFilePermission> permissions;
+ permissions.push_back(BrokerFilePermission::ReadOnly("/proc/cpuinfo"));
+
+ // Get the writing end of a pipe into the broker (child) process so
+ // that we can reliably detect when it dies.
+ int lifeline_fds[2];
+ PCHECK(0 == pipe(lifeline_fds));
+
+ BrokerProcess open_broker(EPERM, permissions, true /* fast_check_in_client */,
+ false /* quiet_failures_for_tests */);
+ ASSERT_TRUE(open_broker.Init(base::Bind(&CloseFD, lifeline_fds[0])));
+ // Make sure the writing end only exists in the broker process.
+ CloseFD(lifeline_fds[1]);
+ base::ScopedFD reader(lifeline_fds[0]);
+
+ const pid_t broker_pid = open_broker.broker_pid();
+
+ // This should cause the broker process to exit.
+ BrokerProcessTestHelper::CloseChannel(&open_broker);
+
+ const int kTimeoutInMilliseconds = 5000;
+ const bool broker_lifeline_closed =
+ WaitForClosedPipeWriter(reader.get(), kTimeoutInMilliseconds);
+ // If the broker exited, its lifeline fd should be closed.
+ ASSERT_TRUE(broker_lifeline_closed);
+ // Now check that the broker has exited, but do not reap it.
+ siginfo_t process_info;
+ ASSERT_EQ(0, HANDLE_EINTR(waitid(P_PID, broker_pid, &process_info,
+ WEXITED | WNOWAIT)));
+ EXPECT_EQ(broker_pid, process_info.si_pid);
+ EXPECT_EQ(CLD_EXITED, process_info.si_code);
+ EXPECT_EQ(1, process_info.si_status);
+}
+
+TEST(BrokerProcess, CreateFile) {
+ std::string temp_str;
+ {
+ ScopedTemporaryFile tmp_file;
+ temp_str = tmp_file.full_file_name();
+ }
+ const char* tempfile_name = temp_str.c_str();
+
+ std::vector<BrokerFilePermission> permissions;
+ permissions.push_back(BrokerFilePermission::ReadWriteCreate(tempfile_name));
+
+ BrokerProcess open_broker(EPERM, permissions);
+ ASSERT_TRUE(open_broker.Init(base::Bind(&NoOpCallback)));
+
+ int fd = -1;
+
+ // Try without O_EXCL
+ fd = open_broker.Open(tempfile_name, O_RDWR | O_CREAT);
+ ASSERT_EQ(fd, -EPERM);
+
+ const char kTestText[] = "TESTTESTTEST";
+ // Create a file
+ fd = open_broker.Open(tempfile_name, O_RDWR | O_CREAT | O_EXCL);
+ ASSERT_GE(fd, 0);
+ {
+ base::ScopedFD scoped_fd(fd);
+
+ // Confirm fail if file exists
+ int bad_fd = open_broker.Open(tempfile_name, O_RDWR | O_CREAT | O_EXCL);
+ ASSERT_EQ(bad_fd, -EEXIST);
+
+ // Write to the descriptor opened by the broker.
+
+ ssize_t len = HANDLE_EINTR(write(fd, kTestText, sizeof(kTestText)));
+ ASSERT_EQ(len, static_cast<ssize_t>(sizeof(kTestText)));
+ }
+
+ int fd_check = open(tempfile_name, O_RDONLY);
+ ASSERT_GE(fd_check, 0);
+ {
+ base::ScopedFD scoped_fd(fd_check);
+ char buf[1024];
+ ssize_t len = HANDLE_EINTR(read(fd_check, buf, sizeof(buf)));
+
+ ASSERT_EQ(len, static_cast<ssize_t>(sizeof(kTestText)));
+ ASSERT_EQ(memcmp(kTestText, buf, sizeof(kTestText)), 0);
+ }
+}
+
+} // namespace syscall_broker
+
+} // namespace sandbox