diff options
author | Alex Vakulenko <avakulenko@google.com> | 2016-01-22 16:52:43 -0800 |
---|---|---|
committer | Alex Vakulenko <avakulenko@google.com> | 2016-01-22 17:02:32 -0800 |
commit | f6024733c0d1eed88f68520b5e6a20b96e212ad6 (patch) | |
tree | fed955593d9995a027a33cd46b41260882ddd3ea /sandbox | |
parent | 340b8dd38ca56de409ca4f790b45b1c314c544dd (diff) | |
download | libchrome-f6024733c0d1eed88f68520b5e6a20b96e212ad6.tar.gz |
libchrome: Revert deleted files needed by Chrome OS
Some of the "unused" files were apparently needed for building libchrome
on Chrome OS. Reverting deletion of these files.
Change-Id: I02e32f112d16480206f43ca0087342a9de7f1e1b
Diffstat (limited to 'sandbox')
81 files changed, 13980 insertions, 0 deletions
diff --git a/sandbox/linux/bpf_dsl/bpf_dsl.cc b/sandbox/linux/bpf_dsl/bpf_dsl.cc new file mode 100644 index 0000000000..3a35903ec9 --- /dev/null +++ b/sandbox/linux/bpf_dsl/bpf_dsl.cc @@ -0,0 +1,363 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/bpf_dsl/bpf_dsl.h" + +#include <limits> + +#include "base/logging.h" +#include "base/memory/ref_counted.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl_impl.h" +#include "sandbox/linux/bpf_dsl/policy_compiler.h" +#include "sandbox/linux/seccomp-bpf/die.h" +#include "sandbox/linux/seccomp-bpf/errorcode.h" + +namespace sandbox { +namespace bpf_dsl { +namespace { + +intptr_t BPFFailure(const struct arch_seccomp_data&, void* aux) { + SANDBOX_DIE(static_cast<char*>(aux)); +} + +class AllowResultExprImpl : public internal::ResultExprImpl { + public: + AllowResultExprImpl() {} + + ErrorCode Compile(PolicyCompiler* pc) const override { + return ErrorCode(ErrorCode::ERR_ALLOWED); + } + + bool IsAllow() const override { return true; } + + private: + ~AllowResultExprImpl() override {} + + DISALLOW_COPY_AND_ASSIGN(AllowResultExprImpl); +}; + +class ErrorResultExprImpl : public internal::ResultExprImpl { + public: + explicit ErrorResultExprImpl(int err) : err_(err) { + CHECK(err_ >= ErrorCode::ERR_MIN_ERRNO && err_ <= ErrorCode::ERR_MAX_ERRNO); + } + + ErrorCode Compile(PolicyCompiler* pc) const override { + return pc->Error(err_); + } + + bool IsDeny() const override { return true; } + + private: + ~ErrorResultExprImpl() override {} + + int err_; + + DISALLOW_COPY_AND_ASSIGN(ErrorResultExprImpl); +}; + +class TraceResultExprImpl : public internal::ResultExprImpl { + public: + TraceResultExprImpl(uint16_t aux) : aux_(aux) {} + + ErrorCode Compile(PolicyCompiler* pc) const override { + return ErrorCode(ErrorCode::ERR_TRACE + aux_); + } + + private: + ~TraceResultExprImpl() override {} + + uint16_t aux_; + + DISALLOW_COPY_AND_ASSIGN(TraceResultExprImpl); +}; + +class TrapResultExprImpl : public internal::ResultExprImpl { + public: + TrapResultExprImpl(TrapRegistry::TrapFnc func, const void* arg, bool safe) + : func_(func), arg_(arg), safe_(safe) { + DCHECK(func_); + } + + ErrorCode Compile(PolicyCompiler* pc) const override { + return pc->Trap(func_, arg_, safe_); + } + + bool HasUnsafeTraps() const override { return safe_ == false; } + + bool IsDeny() const override { return true; } + + private: + ~TrapResultExprImpl() override {} + + TrapRegistry::TrapFnc func_; + const void* arg_; + bool safe_; + + DISALLOW_COPY_AND_ASSIGN(TrapResultExprImpl); +}; + +class IfThenResultExprImpl : public internal::ResultExprImpl { + public: + IfThenResultExprImpl(const BoolExpr& cond, + const ResultExpr& then_result, + const ResultExpr& else_result) + : cond_(cond), then_result_(then_result), else_result_(else_result) {} + + ErrorCode Compile(PolicyCompiler* pc) const override { + return cond_->Compile( + pc, then_result_->Compile(pc), else_result_->Compile(pc)); + } + + bool HasUnsafeTraps() const override { + return then_result_->HasUnsafeTraps() || else_result_->HasUnsafeTraps(); + } + + private: + ~IfThenResultExprImpl() override {} + + BoolExpr cond_; + ResultExpr then_result_; + ResultExpr else_result_; + + DISALLOW_COPY_AND_ASSIGN(IfThenResultExprImpl); +}; + +class ConstBoolExprImpl : public internal::BoolExprImpl { + public: + ConstBoolExprImpl(bool value) : value_(value) {} + + ErrorCode Compile(PolicyCompiler* pc, + ErrorCode true_ec, + ErrorCode false_ec) const override { + return value_ ? true_ec : false_ec; + } + + private: + ~ConstBoolExprImpl() override {} + + bool value_; + + DISALLOW_COPY_AND_ASSIGN(ConstBoolExprImpl); +}; + +class PrimitiveBoolExprImpl : public internal::BoolExprImpl { + public: + PrimitiveBoolExprImpl(int argno, + ErrorCode::ArgType is_32bit, + uint64_t mask, + uint64_t value) + : argno_(argno), is_32bit_(is_32bit), mask_(mask), value_(value) {} + + ErrorCode Compile(PolicyCompiler* pc, + ErrorCode true_ec, + ErrorCode false_ec) const override { + return pc->CondMaskedEqual( + argno_, is_32bit_, mask_, value_, true_ec, false_ec); + } + + private: + ~PrimitiveBoolExprImpl() override {} + + int argno_; + ErrorCode::ArgType is_32bit_; + uint64_t mask_; + uint64_t value_; + + DISALLOW_COPY_AND_ASSIGN(PrimitiveBoolExprImpl); +}; + +class NegateBoolExprImpl : public internal::BoolExprImpl { + public: + explicit NegateBoolExprImpl(const BoolExpr& cond) : cond_(cond) {} + + ErrorCode Compile(PolicyCompiler* pc, + ErrorCode true_ec, + ErrorCode false_ec) const override { + return cond_->Compile(pc, false_ec, true_ec); + } + + private: + ~NegateBoolExprImpl() override {} + + BoolExpr cond_; + + DISALLOW_COPY_AND_ASSIGN(NegateBoolExprImpl); +}; + +class AndBoolExprImpl : public internal::BoolExprImpl { + public: + AndBoolExprImpl(const BoolExpr& lhs, const BoolExpr& rhs) + : lhs_(lhs), rhs_(rhs) {} + + ErrorCode Compile(PolicyCompiler* pc, + ErrorCode true_ec, + ErrorCode false_ec) const override { + return lhs_->Compile(pc, rhs_->Compile(pc, true_ec, false_ec), false_ec); + } + + private: + ~AndBoolExprImpl() override {} + + BoolExpr lhs_; + BoolExpr rhs_; + + DISALLOW_COPY_AND_ASSIGN(AndBoolExprImpl); +}; + +class OrBoolExprImpl : public internal::BoolExprImpl { + public: + OrBoolExprImpl(const BoolExpr& lhs, const BoolExpr& rhs) + : lhs_(lhs), rhs_(rhs) {} + + ErrorCode Compile(PolicyCompiler* pc, + ErrorCode true_ec, + ErrorCode false_ec) const override { + return lhs_->Compile(pc, true_ec, rhs_->Compile(pc, true_ec, false_ec)); + } + + private: + ~OrBoolExprImpl() override {} + + BoolExpr lhs_; + BoolExpr rhs_; + + DISALLOW_COPY_AND_ASSIGN(OrBoolExprImpl); +}; + +} // namespace + +namespace internal { + +bool ResultExprImpl::HasUnsafeTraps() const { + return false; +} + +bool ResultExprImpl::IsAllow() const { + return false; +} + +bool ResultExprImpl::IsDeny() const { + return false; +} + +uint64_t DefaultMask(size_t size) { + switch (size) { + case 4: + return std::numeric_limits<uint32_t>::max(); + case 8: + return std::numeric_limits<uint64_t>::max(); + default: + CHECK(false) << "Unimplemented DefaultMask case"; + return 0; + } +} + +BoolExpr ArgEq(int num, size_t size, uint64_t mask, uint64_t val) { + CHECK(size == 4 || size == 8); + + // TODO(mdempsky): Should we just always use TP_64BIT? + const ErrorCode::ArgType arg_type = + (size == 4) ? ErrorCode::TP_32BIT : ErrorCode::TP_64BIT; + + return BoolExpr(new const PrimitiveBoolExprImpl(num, arg_type, mask, val)); +} + +} // namespace internal + +ResultExpr Allow() { + return ResultExpr(new const AllowResultExprImpl()); +} + +ResultExpr Error(int err) { + return ResultExpr(new const ErrorResultExprImpl(err)); +} + +ResultExpr Kill(const char* msg) { + return Trap(BPFFailure, msg); +} + +ResultExpr Trace(uint16_t aux) { + return ResultExpr(new const TraceResultExprImpl(aux)); +} + +ResultExpr Trap(TrapRegistry::TrapFnc trap_func, const void* aux) { + return ResultExpr( + new const TrapResultExprImpl(trap_func, aux, true /* safe */)); +} + +ResultExpr UnsafeTrap(TrapRegistry::TrapFnc trap_func, const void* aux) { + return ResultExpr( + new const TrapResultExprImpl(trap_func, aux, false /* unsafe */)); +} + +BoolExpr BoolConst(bool value) { + return BoolExpr(new const ConstBoolExprImpl(value)); +} + +BoolExpr operator!(const BoolExpr& cond) { + return BoolExpr(new const NegateBoolExprImpl(cond)); +} + +BoolExpr operator&&(const BoolExpr& lhs, const BoolExpr& rhs) { + return BoolExpr(new const AndBoolExprImpl(lhs, rhs)); +} + +BoolExpr operator||(const BoolExpr& lhs, const BoolExpr& rhs) { + return BoolExpr(new const OrBoolExprImpl(lhs, rhs)); +} + +Elser If(const BoolExpr& cond, const ResultExpr& then_result) { + return Elser(nullptr).ElseIf(cond, then_result); +} + +Elser::Elser(cons::List<Clause> clause_list) : clause_list_(clause_list) { +} + +Elser::Elser(const Elser& elser) : clause_list_(elser.clause_list_) { +} + +Elser::~Elser() { +} + +Elser Elser::ElseIf(const BoolExpr& cond, const ResultExpr& then_result) const { + return Elser(Cons(std::make_pair(cond, then_result), clause_list_)); +} + +ResultExpr Elser::Else(const ResultExpr& else_result) const { + // We finally have the default result expression for this + // if/then/else sequence. Also, we've already accumulated all + // if/then pairs into a list of reverse order (i.e., lower priority + // conditions are listed before higher priority ones). E.g., an + // expression like + // + // If(b1, e1).ElseIf(b2, e2).ElseIf(b3, e3).Else(e4) + // + // will have built up a list like + // + // [(b3, e3), (b2, e2), (b1, e1)]. + // + // Now that we have e4, we can walk the list and create a ResultExpr + // tree like: + // + // expr = e4 + // expr = (b3 ? e3 : expr) = (b3 ? e3 : e4) + // expr = (b2 ? e2 : expr) = (b2 ? e2 : (b3 ? e3 : e4)) + // expr = (b1 ? e1 : expr) = (b1 ? e1 : (b2 ? e2 : (b3 ? e3 : e4))) + // + // and end up with an appropriately chained tree. + + ResultExpr expr = else_result; + for (const Clause& clause : clause_list_) { + expr = ResultExpr( + new const IfThenResultExprImpl(clause.first, clause.second, expr)); + } + return expr; +} + +} // namespace bpf_dsl +} // namespace sandbox + +template class scoped_refptr<const sandbox::bpf_dsl::internal::BoolExprImpl>; +template class scoped_refptr<const sandbox::bpf_dsl::internal::ResultExprImpl>; diff --git a/sandbox/linux/bpf_dsl/bpf_dsl.h b/sandbox/linux/bpf_dsl/bpf_dsl.h new file mode 100644 index 0000000000..365e9b5466 --- /dev/null +++ b/sandbox/linux/bpf_dsl/bpf_dsl.h @@ -0,0 +1,317 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_BPF_DSL_BPF_DSL_H_ +#define SANDBOX_LINUX_BPF_DSL_BPF_DSL_H_ + +#include <stdint.h> + +#include <utility> +#include <vector> + +#include "base/macros.h" +#include "base/memory/ref_counted.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl_forward.h" +#include "sandbox/linux/bpf_dsl/cons.h" +#include "sandbox/linux/bpf_dsl/trap_registry.h" +#include "sandbox/sandbox_export.h" + +// The sandbox::bpf_dsl namespace provides a domain-specific language +// to make writing BPF policies more expressive. In general, the +// object types all have value semantics (i.e., they can be copied +// around, returned from or passed to function calls, etc. without any +// surprising side effects), though not all support assignment. +// +// An idiomatic and demonstrative (albeit silly) example of this API +// would be: +// +// #include "sandbox/linux/bpf_dsl/bpf_dsl.h" +// +// using namespace sandbox::bpf_dsl; +// +// class SillyPolicy : public Policy { +// public: +// SillyPolicy() {} +// ~SillyPolicy() override {} +// ResultExpr EvaluateSyscall(int sysno) const override { +// if (sysno == __NR_fcntl) { +// Arg<int> fd(0), cmd(1); +// Arg<unsigned long> flags(2); +// const uint64_t kGoodFlags = O_ACCMODE | O_NONBLOCK; +// return If(fd == 0 && cmd == F_SETFL && (flags & ~kGoodFlags) == 0, +// Allow()) +// .ElseIf(cmd == F_DUPFD || cmd == F_DUPFD_CLOEXEC, +// Error(EMFILE)) +// .Else(Trap(SetFlagHandler, NULL)); +// } else { +// return Allow(); +// } +// } +// +// private: +// DISALLOW_COPY_AND_ASSIGN(SillyPolicy); +// }; +// +// More generally, the DSL currently supports the following grammar: +// +// result = Allow() | Error(errno) | Kill(msg) | Trace(aux) +// | Trap(trap_func, aux) | UnsafeTrap(trap_func, aux) +// | If(bool, result)[.ElseIf(bool, result)].Else(result) +// | Switch(arg)[.Case(val, result)].Default(result) +// bool = BoolConst(boolean) | !bool | bool && bool | bool || bool +// | arg == val | arg != val +// arg = Arg<T>(num) | arg & mask +// +// The semantics of each function and operator are intended to be +// intuitive, but are described in more detail below. +// +// (Credit to Sean Parent's "Inheritance is the Base Class of Evil" +// talk at Going Native 2013 for promoting value semantics via shared +// pointers to immutable state.) + +namespace sandbox { +namespace bpf_dsl { + +// ResultExpr is an opaque reference to an immutable result expression tree. +typedef scoped_refptr<const internal::ResultExprImpl> ResultExpr; + +// BoolExpr is an opaque reference to an immutable boolean expression tree. +typedef scoped_refptr<const internal::BoolExprImpl> BoolExpr; + +// Allow specifies a result that the system call should be allowed to +// execute normally. +SANDBOX_EXPORT ResultExpr Allow(); + +// Error specifies a result that the system call should fail with +// error number |err|. As a special case, Error(0) will result in the +// system call appearing to have succeeded, but without having any +// side effects. +SANDBOX_EXPORT ResultExpr Error(int err); + +// Kill specifies a result to kill the program and print an error message. +SANDBOX_EXPORT ResultExpr Kill(const char* msg); + +// Trace specifies a result to notify a tracing process via the +// PTRACE_EVENT_SECCOMP event and allow it to change or skip the system call. +// The value of |aux| will be available to the tracer via PTRACE_GETEVENTMSG. +SANDBOX_EXPORT ResultExpr Trace(uint16_t aux); + +// Trap specifies a result that the system call should be handled by +// trapping back into userspace and invoking |trap_func|, passing +// |aux| as the second parameter. +SANDBOX_EXPORT ResultExpr + Trap(TrapRegistry::TrapFnc trap_func, const void* aux); + +// UnsafeTrap is like Trap, except the policy is marked as "unsafe" +// and allowed to use SandboxSyscall to invoke any system call. +// +// NOTE: This feature, by definition, disables all security features of +// the sandbox. It should never be used in production, but it can be +// very useful to diagnose code that is incompatible with the sandbox. +// If even a single system call returns "UnsafeTrap", the security of +// entire sandbox should be considered compromised. +SANDBOX_EXPORT ResultExpr + UnsafeTrap(TrapRegistry::TrapFnc trap_func, const void* aux); + +// BoolConst converts a bool value into a BoolExpr. +SANDBOX_EXPORT BoolExpr BoolConst(bool value); + +// Various ways to combine boolean expressions into more complex expressions. +// They follow standard boolean algebra laws. +SANDBOX_EXPORT BoolExpr operator!(const BoolExpr& cond); +SANDBOX_EXPORT BoolExpr operator&&(const BoolExpr& lhs, const BoolExpr& rhs); +SANDBOX_EXPORT BoolExpr operator||(const BoolExpr& lhs, const BoolExpr& rhs); + +template <typename T> +class SANDBOX_EXPORT Arg { + public: + // Initializes the Arg to represent the |num|th system call + // argument (indexed from 0), which is of type |T|. + explicit Arg(int num); + + Arg(const Arg& arg) : num_(arg.num_), mask_(arg.mask_) {} + + // Returns an Arg representing the current argument, but after + // bitwise-and'ing it with |rhs|. + friend Arg operator&(const Arg& lhs, uint64_t rhs) { + return Arg(lhs.num_, lhs.mask_ & rhs); + } + + // Returns a boolean expression comparing whether the system call argument + // (after applying any bitmasks, if appropriate) equals |rhs|. + friend BoolExpr operator==(const Arg& lhs, T rhs) { return lhs.EqualTo(rhs); } + + // Returns a boolean expression comparing whether the system call argument + // (after applying any bitmasks, if appropriate) does not equal |rhs|. + friend BoolExpr operator!=(const Arg& lhs, T rhs) { return !(lhs == rhs); } + + private: + Arg(int num, uint64_t mask) : num_(num), mask_(mask) {} + + BoolExpr EqualTo(T val) const; + + int num_; + uint64_t mask_; + + DISALLOW_ASSIGN(Arg); +}; + +// If begins a conditional result expression predicated on the +// specified boolean expression. +SANDBOX_EXPORT Elser If(const BoolExpr& cond, const ResultExpr& then_result); + +class SANDBOX_EXPORT Elser { + public: + Elser(const Elser& elser); + ~Elser(); + + // ElseIf extends the conditional result expression with another + // "if then" clause, predicated on the specified boolean expression. + Elser ElseIf(const BoolExpr& cond, const ResultExpr& then_result) const; + + // Else terminates a conditional result expression using |else_result| as + // the default fallback result expression. + ResultExpr Else(const ResultExpr& else_result) const; + + private: + typedef std::pair<BoolExpr, ResultExpr> Clause; + + explicit Elser(cons::List<Clause> clause_list); + + cons::List<Clause> clause_list_; + + friend Elser If(const BoolExpr&, const ResultExpr&); + template <typename T> + friend Caser<T> Switch(const Arg<T>&); + DISALLOW_ASSIGN(Elser); +}; + +// Switch begins a switch expression dispatched according to the +// specified argument value. +template <typename T> +SANDBOX_EXPORT Caser<T> Switch(const Arg<T>& arg); + +template <typename T> +class SANDBOX_EXPORT Caser { + public: + Caser(const Caser<T>& caser) : arg_(caser.arg_), elser_(caser.elser_) {} + ~Caser() {} + + // Case adds a single-value "case" clause to the switch. + Caser<T> Case(T value, ResultExpr result) const; + + // Cases adds a multiple-value "case" clause to the switch. + // See also the SANDBOX_BPF_DSL_CASES macro below for a more idiomatic way + // of using this function. + Caser<T> Cases(const std::vector<T>& values, ResultExpr result) const; + + // Terminate the switch with a "default" clause. + ResultExpr Default(ResultExpr result) const; + + private: + Caser(const Arg<T>& arg, Elser elser) : arg_(arg), elser_(elser) {} + + Arg<T> arg_; + Elser elser_; + + template <typename U> + friend Caser<U> Switch(const Arg<U>&); + DISALLOW_ASSIGN(Caser); +}; + +// Recommended usage is to put +// #define CASES SANDBOX_BPF_DSL_CASES +// near the top of the .cc file (e.g., nearby any "using" statements), then +// use like: +// Switch(arg).CASES((3, 5, 7), result)...; +#define SANDBOX_BPF_DSL_CASES(values, result) \ + Cases(SANDBOX_BPF_DSL_CASES_HELPER values, result) + +// Helper macro to construct a std::vector from an initializer list. +// TODO(mdempsky): Convert to use C++11 initializer lists instead. +#define SANDBOX_BPF_DSL_CASES_HELPER(value, ...) \ + ({ \ + const __typeof__(value) bpf_dsl_cases_values[] = {value, __VA_ARGS__}; \ + std::vector<__typeof__(value)>( \ + bpf_dsl_cases_values, \ + bpf_dsl_cases_values + arraysize(bpf_dsl_cases_values)); \ + }) + +// ===================================================================== +// Official API ends here. +// ===================================================================== + +namespace internal { + +// Make argument-dependent lookup work. This is necessary because although +// BoolExpr is defined in bpf_dsl, since it's merely a typedef for +// scoped_refptr<const internal::BoolExplImpl>, argument-dependent lookup only +// searches the "internal" nested namespace. +using bpf_dsl::operator!; +using bpf_dsl::operator||; +using bpf_dsl::operator&&; + +// Returns a boolean expression that represents whether system call +// argument |num| of size |size| is equal to |val|, when masked +// according to |mask|. Users should use the Arg template class below +// instead of using this API directly. +SANDBOX_EXPORT BoolExpr + ArgEq(int num, size_t size, uint64_t mask, uint64_t val); + +// Returns the default mask for a system call argument of the specified size. +SANDBOX_EXPORT uint64_t DefaultMask(size_t size); + +} // namespace internal + +template <typename T> +Arg<T>::Arg(int num) + : num_(num), mask_(internal::DefaultMask(sizeof(T))) { +} + +// Definition requires ArgEq to have been declared. Moved out-of-line +// to minimize how much internal clutter users have to ignore while +// reading the header documentation. +// +// Additionally, we use this helper member function to avoid linker errors +// caused by defining operator== out-of-line. For a more detailed explanation, +// see http://www.parashift.com/c++-faq-lite/template-friends.html. +template <typename T> +BoolExpr Arg<T>::EqualTo(T val) const { + return internal::ArgEq(num_, sizeof(T), mask_, static_cast<uint64_t>(val)); +} + +template <typename T> +SANDBOX_EXPORT Caser<T> Switch(const Arg<T>& arg) { + return Caser<T>(arg, Elser(nullptr)); +} + +template <typename T> +Caser<T> Caser<T>::Case(T value, ResultExpr result) const { + return SANDBOX_BPF_DSL_CASES((value), result); +} + +template <typename T> +Caser<T> Caser<T>::Cases(const std::vector<T>& values, + ResultExpr result) const { + // Theoretically we could evaluate arg_ just once and emit a more efficient + // dispatch table, but for now we simply translate into an equivalent + // If/ElseIf/Else chain. + + typedef typename std::vector<T>::const_iterator Iter; + BoolExpr test = BoolConst(false); + for (Iter i = values.begin(), end = values.end(); i != end; ++i) { + test = test || (arg_ == *i); + } + return Caser<T>(arg_, elser_.ElseIf(test, result)); +} + +template <typename T> +ResultExpr Caser<T>::Default(ResultExpr result) const { + return elser_.Else(result); +} + +} // namespace bpf_dsl +} // namespace sandbox + +#endif // SANDBOX_LINUX_BPF_DSL_BPF_DSL_H_ diff --git a/sandbox/linux/bpf_dsl/bpf_dsl_unittest.cc b/sandbox/linux/bpf_dsl/bpf_dsl_unittest.cc new file mode 100644 index 0000000000..398ec59ef1 --- /dev/null +++ b/sandbox/linux/bpf_dsl/bpf_dsl_unittest.cc @@ -0,0 +1,486 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/bpf_dsl/bpf_dsl.h" + +#include <errno.h> +#include <fcntl.h> +#include <netinet/in.h> +#include <sys/socket.h> +#include <sys/syscall.h> +#include <sys/utsname.h> +#include <unistd.h> + +#include <map> +#include <utility> + +#include "base/files/scoped_file.h" +#include "base/macros.h" +#include "build/build_config.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl_impl.h" +#include "sandbox/linux/bpf_dsl/codegen.h" +#include "sandbox/linux/bpf_dsl/policy.h" +#include "sandbox/linux/bpf_dsl/policy_compiler.h" +#include "sandbox/linux/bpf_dsl/seccomp_macros.h" +#include "sandbox/linux/bpf_dsl/trap_registry.h" +#include "sandbox/linux/bpf_dsl/verifier.h" +#include "sandbox/linux/seccomp-bpf/errorcode.h" +#include "sandbox/linux/system_headers/linux_filter.h" +#include "testing/gtest/include/gtest/gtest.h" + +#define CASES SANDBOX_BPF_DSL_CASES + +namespace sandbox { +namespace bpf_dsl { +namespace { + +// Helper function to construct fake arch_seccomp_data objects. +struct arch_seccomp_data FakeSyscall(int nr, + uint64_t p0 = 0, + uint64_t p1 = 0, + uint64_t p2 = 0, + uint64_t p3 = 0, + uint64_t p4 = 0, + uint64_t p5 = 0) { + // Made up program counter for syscall address. + const uint64_t kFakePC = 0x543210; + + struct arch_seccomp_data data = { + nr, + SECCOMP_ARCH, + kFakePC, + { + p0, p1, p2, p3, p4, p5, + }, + }; + + return data; +} + +class FakeTrapRegistry : public TrapRegistry { + public: + FakeTrapRegistry() : map_() {} + virtual ~FakeTrapRegistry() {} + + uint16_t Add(TrapFnc fnc, const void* aux, bool safe) override { + EXPECT_TRUE(safe); + + const uint16_t next_id = map_.size() + 1; + return map_.insert(std::make_pair(Key(fnc, aux), next_id)).first->second; + } + + bool EnableUnsafeTraps() override { + ADD_FAILURE() << "Unimplemented"; + return false; + } + + private: + using Key = std::pair<TrapFnc, const void*>; + + std::map<Key, uint16_t> map_; + + DISALLOW_COPY_AND_ASSIGN(FakeTrapRegistry); +}; + +intptr_t FakeTrapFuncOne(const arch_seccomp_data& data, void* aux) { return 1; } +intptr_t FakeTrapFuncTwo(const arch_seccomp_data& data, void* aux) { return 2; } + +// Test that FakeTrapRegistry correctly assigns trap IDs to trap handlers. +TEST(FakeTrapRegistry, TrapIDs) { + struct { + TrapRegistry::TrapFnc fnc; + const void* aux; + } funcs[] = { + {FakeTrapFuncOne, nullptr}, + {FakeTrapFuncTwo, nullptr}, + {FakeTrapFuncOne, funcs}, + {FakeTrapFuncTwo, funcs}, + }; + + FakeTrapRegistry traps; + + // Add traps twice to test that IDs are reused correctly. + for (int i = 0; i < 2; ++i) { + for (size_t j = 0; j < arraysize(funcs); ++j) { + // Trap IDs start at 1. + EXPECT_EQ(j + 1, traps.Add(funcs[j].fnc, funcs[j].aux, true)); + } + } +} + +class PolicyEmulator { + public: + explicit PolicyEmulator(const Policy* policy) : program_(), traps_() { + program_ = *PolicyCompiler(policy, &traps_).Compile(true /* verify */); + } + ~PolicyEmulator() {} + + uint32_t Emulate(const struct arch_seccomp_data& data) const { + const char* err = nullptr; + uint32_t res = Verifier::EvaluateBPF(program_, data, &err); + if (err) { + ADD_FAILURE() << err; + return 0; + } + return res; + } + + void ExpectAllow(const struct arch_seccomp_data& data) const { + EXPECT_EQ(SECCOMP_RET_ALLOW, Emulate(data)); + } + + void ExpectErrno(uint16_t err, const struct arch_seccomp_data& data) const { + EXPECT_EQ(SECCOMP_RET_ERRNO | err, Emulate(data)); + } + + private: + CodeGen::Program program_; + FakeTrapRegistry traps_; + + DISALLOW_COPY_AND_ASSIGN(PolicyEmulator); +}; + +class BasicPolicy : public Policy { + public: + BasicPolicy() {} + ~BasicPolicy() override {} + ResultExpr EvaluateSyscall(int sysno) const override { + if (sysno == __NR_getpgid) { + const Arg<pid_t> pid(0); + return If(pid == 0, Error(EPERM)).Else(Error(EINVAL)); + } + if (sysno == __NR_setuid) { + const Arg<uid_t> uid(0); + return If(uid != 42, Error(ESRCH)).Else(Error(ENOMEM)); + } + return Allow(); + } + + private: + DISALLOW_COPY_AND_ASSIGN(BasicPolicy); +}; + +TEST(BPFDSL, Basic) { + BasicPolicy policy; + PolicyEmulator emulator(&policy); + + emulator.ExpectErrno(EPERM, FakeSyscall(__NR_getpgid, 0)); + emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_getpgid, 1)); + + emulator.ExpectErrno(ENOMEM, FakeSyscall(__NR_setuid, 42)); + emulator.ExpectErrno(ESRCH, FakeSyscall(__NR_setuid, 43)); +} + +/* On IA-32, socketpair() is implemented via socketcall(). :-( */ +#if !defined(ARCH_CPU_X86) +class BooleanLogicPolicy : public Policy { + public: + BooleanLogicPolicy() {} + ~BooleanLogicPolicy() override {} + ResultExpr EvaluateSyscall(int sysno) const override { + if (sysno == __NR_socketpair) { + const Arg<int> domain(0), type(1), protocol(2); + return If(domain == AF_UNIX && + (type == SOCK_STREAM || type == SOCK_DGRAM) && + protocol == 0, + Error(EPERM)).Else(Error(EINVAL)); + } + return Allow(); + } + + private: + DISALLOW_COPY_AND_ASSIGN(BooleanLogicPolicy); +}; + +TEST(BPFDSL, BooleanLogic) { + BooleanLogicPolicy policy; + PolicyEmulator emulator(&policy); + + const intptr_t kFakeSV = 0x12345; + + // Acceptable combinations that should return EPERM. + emulator.ExpectErrno( + EPERM, FakeSyscall(__NR_socketpair, AF_UNIX, SOCK_STREAM, 0, kFakeSV)); + emulator.ExpectErrno( + EPERM, FakeSyscall(__NR_socketpair, AF_UNIX, SOCK_DGRAM, 0, kFakeSV)); + + // Combinations that are invalid for only one reason; should return EINVAL. + emulator.ExpectErrno( + EINVAL, FakeSyscall(__NR_socketpair, AF_INET, SOCK_STREAM, 0, kFakeSV)); + emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_socketpair, AF_UNIX, + SOCK_SEQPACKET, 0, kFakeSV)); + emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_socketpair, AF_UNIX, + SOCK_STREAM, IPPROTO_TCP, kFakeSV)); + + // Completely unacceptable combination; should also return EINVAL. + emulator.ExpectErrno( + EINVAL, FakeSyscall(__NR_socketpair, AF_INET, SOCK_SEQPACKET, IPPROTO_UDP, + kFakeSV)); +} +#endif // !ARCH_CPU_X86 + +class MoreBooleanLogicPolicy : public Policy { + public: + MoreBooleanLogicPolicy() {} + ~MoreBooleanLogicPolicy() override {} + ResultExpr EvaluateSyscall(int sysno) const override { + if (sysno == __NR_setresuid) { + const Arg<uid_t> ruid(0), euid(1), suid(2); + return If(ruid == 0 || euid == 0 || suid == 0, Error(EPERM)) + .ElseIf(ruid == 1 && euid == 1 && suid == 1, Error(EAGAIN)) + .Else(Error(EINVAL)); + } + return Allow(); + } + + private: + DISALLOW_COPY_AND_ASSIGN(MoreBooleanLogicPolicy); +}; + +TEST(BPFDSL, MoreBooleanLogic) { + MoreBooleanLogicPolicy policy; + PolicyEmulator emulator(&policy); + + // Expect EPERM if any set to 0. + emulator.ExpectErrno(EPERM, FakeSyscall(__NR_setresuid, 0, 5, 5)); + emulator.ExpectErrno(EPERM, FakeSyscall(__NR_setresuid, 5, 0, 5)); + emulator.ExpectErrno(EPERM, FakeSyscall(__NR_setresuid, 5, 5, 0)); + + // Expect EAGAIN if all set to 1. + emulator.ExpectErrno(EAGAIN, FakeSyscall(__NR_setresuid, 1, 1, 1)); + + // Expect EINVAL for anything else. + emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_setresuid, 5, 1, 1)); + emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_setresuid, 1, 5, 1)); + emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_setresuid, 1, 1, 5)); + emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_setresuid, 3, 4, 5)); +} + +static const uintptr_t kDeadBeefAddr = + static_cast<uintptr_t>(0xdeadbeefdeadbeefULL); + +class ArgSizePolicy : public Policy { + public: + ArgSizePolicy() {} + ~ArgSizePolicy() override {} + ResultExpr EvaluateSyscall(int sysno) const override { + if (sysno == __NR_uname) { + const Arg<uintptr_t> addr(0); + return If(addr == kDeadBeefAddr, Error(EPERM)).Else(Allow()); + } + return Allow(); + } + + private: + DISALLOW_COPY_AND_ASSIGN(ArgSizePolicy); +}; + +TEST(BPFDSL, ArgSizeTest) { + ArgSizePolicy policy; + PolicyEmulator emulator(&policy); + + emulator.ExpectAllow(FakeSyscall(__NR_uname, 0)); + emulator.ExpectErrno(EPERM, FakeSyscall(__NR_uname, kDeadBeefAddr)); +} + +#if 0 +// TODO(mdempsky): This is really an integration test. + +class TrappingPolicy : public Policy { + public: + TrappingPolicy() {} + ~TrappingPolicy() override {} + ResultExpr EvaluateSyscall(int sysno) const override { + if (sysno == __NR_uname) { + return Trap(UnameTrap, &count_); + } + return Allow(); + } + + private: + static intptr_t count_; + + static intptr_t UnameTrap(const struct arch_seccomp_data& data, void* aux) { + BPF_ASSERT_EQ(&count_, aux); + return ++count_; + } + + DISALLOW_COPY_AND_ASSIGN(TrappingPolicy); +}; + +intptr_t TrappingPolicy::count_; + +BPF_TEST_C(BPFDSL, TrapTest, TrappingPolicy) { + ASSERT_SYSCALL_RESULT(1, uname, NULL); + ASSERT_SYSCALL_RESULT(2, uname, NULL); + ASSERT_SYSCALL_RESULT(3, uname, NULL); +} +#endif + +class MaskingPolicy : public Policy { + public: + MaskingPolicy() {} + ~MaskingPolicy() override {} + ResultExpr EvaluateSyscall(int sysno) const override { + if (sysno == __NR_setuid) { + const Arg<uid_t> uid(0); + return If((uid & 0xf) == 0, Error(EINVAL)).Else(Error(EACCES)); + } + if (sysno == __NR_setgid) { + const Arg<gid_t> gid(0); + return If((gid & 0xf0) == 0xf0, Error(EINVAL)).Else(Error(EACCES)); + } + if (sysno == __NR_setpgid) { + const Arg<pid_t> pid(0); + return If((pid & 0xa5) == 0xa0, Error(EINVAL)).Else(Error(EACCES)); + } + return Allow(); + } + + private: + DISALLOW_COPY_AND_ASSIGN(MaskingPolicy); +}; + +TEST(BPFDSL, MaskTest) { + MaskingPolicy policy; + PolicyEmulator emulator(&policy); + + for (uid_t uid = 0; uid < 0x100; ++uid) { + const int expect_errno = (uid & 0xf) == 0 ? EINVAL : EACCES; + emulator.ExpectErrno(expect_errno, FakeSyscall(__NR_setuid, uid)); + } + + for (gid_t gid = 0; gid < 0x100; ++gid) { + const int expect_errno = (gid & 0xf0) == 0xf0 ? EINVAL : EACCES; + emulator.ExpectErrno(expect_errno, FakeSyscall(__NR_setgid, gid)); + } + + for (pid_t pid = 0; pid < 0x100; ++pid) { + const int expect_errno = (pid & 0xa5) == 0xa0 ? EINVAL : EACCES; + emulator.ExpectErrno(expect_errno, FakeSyscall(__NR_setpgid, pid, 0)); + } +} + +class ElseIfPolicy : public Policy { + public: + ElseIfPolicy() {} + ~ElseIfPolicy() override {} + ResultExpr EvaluateSyscall(int sysno) const override { + if (sysno == __NR_setuid) { + const Arg<uid_t> uid(0); + return If((uid & 0xfff) == 0, Error(0)) + .ElseIf((uid & 0xff0) == 0, Error(EINVAL)) + .ElseIf((uid & 0xf00) == 0, Error(EEXIST)) + .Else(Error(EACCES)); + } + return Allow(); + } + + private: + DISALLOW_COPY_AND_ASSIGN(ElseIfPolicy); +}; + +TEST(BPFDSL, ElseIfTest) { + ElseIfPolicy policy; + PolicyEmulator emulator(&policy); + + emulator.ExpectErrno(0, FakeSyscall(__NR_setuid, 0)); + + emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_setuid, 0x0001)); + emulator.ExpectErrno(EINVAL, FakeSyscall(__NR_setuid, 0x0002)); + + emulator.ExpectErrno(EEXIST, FakeSyscall(__NR_setuid, 0x0011)); + emulator.ExpectErrno(EEXIST, FakeSyscall(__NR_setuid, 0x0022)); + + emulator.ExpectErrno(EACCES, FakeSyscall(__NR_setuid, 0x0111)); + emulator.ExpectErrno(EACCES, FakeSyscall(__NR_setuid, 0x0222)); +} + +class SwitchPolicy : public Policy { + public: + SwitchPolicy() {} + ~SwitchPolicy() override {} + ResultExpr EvaluateSyscall(int sysno) const override { + if (sysno == __NR_fcntl) { + const Arg<int> cmd(1); + const Arg<unsigned long> long_arg(2); + return Switch(cmd) + .CASES((F_GETFL, F_GETFD), Error(ENOENT)) + .Case(F_SETFD, If(long_arg == O_CLOEXEC, Allow()).Else(Error(EINVAL))) + .Case(F_SETFL, Error(EPERM)) + .Default(Error(EACCES)); + } + return Allow(); + } + + private: + DISALLOW_COPY_AND_ASSIGN(SwitchPolicy); +}; + +TEST(BPFDSL, SwitchTest) { + SwitchPolicy policy; + PolicyEmulator emulator(&policy); + + const int kFakeSockFD = 42; + + emulator.ExpectErrno(ENOENT, FakeSyscall(__NR_fcntl, kFakeSockFD, F_GETFD)); + emulator.ExpectErrno(ENOENT, FakeSyscall(__NR_fcntl, kFakeSockFD, F_GETFL)); + + emulator.ExpectAllow( + FakeSyscall(__NR_fcntl, kFakeSockFD, F_SETFD, O_CLOEXEC)); + emulator.ExpectErrno(EINVAL, + FakeSyscall(__NR_fcntl, kFakeSockFD, F_SETFD, 0)); + + emulator.ExpectErrno(EPERM, + FakeSyscall(__NR_fcntl, kFakeSockFD, F_SETFL, O_RDONLY)); + + emulator.ExpectErrno(EACCES, + FakeSyscall(__NR_fcntl, kFakeSockFD, F_DUPFD, 0)); +} + +static intptr_t DummyTrap(const struct arch_seccomp_data& data, void* aux) { + return 0; +} + +TEST(BPFDSL, IsAllowDeny) { + ResultExpr allow = Allow(); + EXPECT_TRUE(allow->IsAllow()); + EXPECT_FALSE(allow->IsDeny()); + + ResultExpr error = Error(ENOENT); + EXPECT_FALSE(error->IsAllow()); + EXPECT_TRUE(error->IsDeny()); + + ResultExpr trace = Trace(42); + EXPECT_FALSE(trace->IsAllow()); + EXPECT_FALSE(trace->IsDeny()); + + ResultExpr trap = Trap(DummyTrap, nullptr); + EXPECT_FALSE(trap->IsAllow()); + EXPECT_TRUE(trap->IsDeny()); + + const Arg<int> arg(0); + ResultExpr maybe = If(arg == 0, Allow()).Else(Error(EPERM)); + EXPECT_FALSE(maybe->IsAllow()); + EXPECT_FALSE(maybe->IsDeny()); +} + +TEST(BPFDSL, HasUnsafeTraps) { + ResultExpr allow = Allow(); + EXPECT_FALSE(allow->HasUnsafeTraps()); + + ResultExpr safe = Trap(DummyTrap, nullptr); + EXPECT_FALSE(safe->HasUnsafeTraps()); + + ResultExpr unsafe = UnsafeTrap(DummyTrap, nullptr); + EXPECT_TRUE(unsafe->HasUnsafeTraps()); + + const Arg<int> arg(0); + ResultExpr maybe = If(arg == 0, allow).Else(unsafe); + EXPECT_TRUE(maybe->HasUnsafeTraps()); +} + +} // namespace +} // namespace bpf_dsl +} // namespace sandbox diff --git a/sandbox/linux/bpf_dsl/codegen.cc b/sandbox/linux/bpf_dsl/codegen.cc new file mode 100644 index 0000000000..2d5c8e406e --- /dev/null +++ b/sandbox/linux/bpf_dsl/codegen.cc @@ -0,0 +1,159 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/bpf_dsl/codegen.h" + +#include <limits> +#include <utility> + +#include "base/logging.h" +#include "sandbox/linux/system_headers/linux_filter.h" + +// This CodeGen implementation strives for simplicity while still +// generating acceptable BPF programs under typical usage patterns +// (e.g., by PolicyCompiler). +// +// The key to its simplicity is that BPF programs only support forward +// jumps/branches, which allows constraining the DAG construction API +// to make instruction nodes immutable. Immutable nodes admits a +// simple greedy approach of emitting new instructions as needed and +// then reusing existing ones that have already been emitted. This +// cleanly avoids any need to compute basic blocks or apply +// topological sorting because the API effectively sorts instructions +// for us (e.g., before MakeInstruction() can be called to emit a +// branch instruction, it must have already been called for each +// branch path). +// +// This greedy algorithm is not without (theoretical) weakness though: +// +// 1. In the general case, we don't eliminate dead code. If needed, +// we could trace back through the program in Compile() and elide +// any unneeded instructions, but in practice we only emit live +// instructions anyway. +// +// 2. By not dividing instructions into basic blocks and sorting, we +// lose an opportunity to move non-branch/non-return instructions +// adjacent to their successor instructions, which means we might +// need to emit additional jumps. But in practice, they'll +// already be nearby as long as callers don't go out of their way +// to interleave MakeInstruction() calls for unrelated code +// sequences. + +namespace sandbox { + +// kBranchRange is the maximum value that can be stored in +// sock_filter's 8-bit jt and jf fields. +const size_t kBranchRange = std::numeric_limits<uint8_t>::max(); + +const CodeGen::Node CodeGen::kNullNode; + +CodeGen::CodeGen() : program_(), equivalent_(), memos_() { +} + +CodeGen::~CodeGen() { +} + +void CodeGen::Compile(CodeGen::Node head, Program* out) { + DCHECK(out); + out->assign(program_.rbegin() + Offset(head), program_.rend()); +} + +CodeGen::Node CodeGen::MakeInstruction(uint16_t code, + uint32_t k, + Node jt, + Node jf) { + // To avoid generating redundant code sequences, we memoize the + // results from AppendInstruction(). + auto res = memos_.insert(std::make_pair(MemoKey(code, k, jt, jf), kNullNode)); + CodeGen::Node* node = &res.first->second; + if (res.second) { // Newly inserted memo entry. + *node = AppendInstruction(code, k, jt, jf); + } + return *node; +} + +CodeGen::Node CodeGen::AppendInstruction(uint16_t code, + uint32_t k, + Node jt, + Node jf) { + if (BPF_CLASS(code) == BPF_JMP) { + CHECK_NE(BPF_JA, BPF_OP(code)) << "CodeGen inserts JAs as needed"; + + // Optimally adding jumps is rather tricky, so we use a quick + // approximation: by artificially reducing |jt|'s range, |jt| will + // stay within its true range even if we add a jump for |jf|. + jt = WithinRange(jt, kBranchRange - 1); + jf = WithinRange(jf, kBranchRange); + return Append(code, k, Offset(jt), Offset(jf)); + } + + CHECK_EQ(kNullNode, jf) << "Non-branch instructions shouldn't provide jf"; + if (BPF_CLASS(code) == BPF_RET) { + CHECK_EQ(kNullNode, jt) << "Return instructions shouldn't provide jt"; + } else { + // For non-branch/non-return instructions, execution always + // proceeds to the next instruction; so we need to arrange for + // that to be |jt|. + jt = WithinRange(jt, 0); + CHECK_EQ(0U, Offset(jt)) << "ICE: Failed to setup next instruction"; + } + return Append(code, k, 0, 0); +} + +CodeGen::Node CodeGen::WithinRange(Node target, size_t range) { + // Just use |target| if it's already within range. + if (Offset(target) <= range) { + return target; + } + + // Alternatively, look for an equivalent instruction within range. + if (Offset(equivalent_.at(target)) <= range) { + return equivalent_.at(target); + } + + // Otherwise, fall back to emitting a jump instruction. + Node jump = Append(BPF_JMP | BPF_JA, Offset(target), 0, 0); + equivalent_.at(target) = jump; + return jump; +} + +CodeGen::Node CodeGen::Append(uint16_t code, uint32_t k, size_t jt, size_t jf) { + if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) { + CHECK_LE(jt, kBranchRange); + CHECK_LE(jf, kBranchRange); + } else { + CHECK_EQ(0U, jt); + CHECK_EQ(0U, jf); + } + + CHECK_LT(program_.size(), static_cast<size_t>(BPF_MAXINSNS)); + CHECK_EQ(program_.size(), equivalent_.size()); + + Node res = program_.size(); + program_.push_back(sock_filter{ + code, static_cast<uint8_t>(jt), static_cast<uint8_t>(jf), k}); + equivalent_.push_back(res); + return res; +} + +size_t CodeGen::Offset(Node target) const { + CHECK_LT(target, program_.size()) << "Bogus offset target node"; + return (program_.size() - 1) - target; +} + +// TODO(mdempsky): Move into a general base::Tuple helper library. +bool CodeGen::MemoKeyLess::operator()(const MemoKey& lhs, + const MemoKey& rhs) const { + if (base::get<0>(lhs) != base::get<0>(rhs)) + return base::get<0>(lhs) < base::get<0>(rhs); + if (base::get<1>(lhs) != base::get<1>(rhs)) + return base::get<1>(lhs) < base::get<1>(rhs); + if (base::get<2>(lhs) != base::get<2>(rhs)) + return base::get<2>(lhs) < base::get<2>(rhs); + if (base::get<3>(lhs) != base::get<3>(rhs)) + return base::get<3>(lhs) < base::get<3>(rhs); + return false; +} + +} // namespace sandbox diff --git a/sandbox/linux/bpf_dsl/codegen.h b/sandbox/linux/bpf_dsl/codegen.h new file mode 100644 index 0000000000..9d898030b9 --- /dev/null +++ b/sandbox/linux/bpf_dsl/codegen.h @@ -0,0 +1,123 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_BPF_DSL_CODEGEN_H__ +#define SANDBOX_LINUX_BPF_DSL_CODEGEN_H__ + +#include <stddef.h> +#include <stdint.h> + +#include <map> +#include <vector> + +#include "base/macros.h" +#include "base/tuple.h" +#include "sandbox/sandbox_export.h" + +struct sock_filter; + +namespace sandbox { + +// The code generator implements a basic assembler that can convert a +// graph of BPF instructions into a well-formed array of BPF +// instructions. Most notably, it ensures that jumps are always +// forward and don't exceed the limit of 255 instructions imposed by +// the instruction set. +// +// Callers would typically create a new CodeGen object and then use it +// to build a DAG of instruction nodes. They'll eventually call +// Compile() to convert this DAG to a Program. +// +// CodeGen gen; +// CodeGen::Node allow, branch, dag; +// +// allow = +// gen.MakeInstruction(BPF_RET+BPF_K, +// ErrorCode(ErrorCode::ERR_ALLOWED).err())); +// branch = +// gen.MakeInstruction(BPF_JMP+BPF_EQ+BPF_K, __NR_getpid, +// Trap(GetPidHandler, NULL), allow); +// dag = +// gen.MakeInstruction(BPF_LD+BPF_W+BPF_ABS, +// offsetof(struct arch_seccomp_data, nr), branch); +// +// // Simplified code follows; in practice, it is important to avoid calling +// // any C++ destructors after starting the sandbox. +// CodeGen::Program program; +// gen.Compile(dag, program); +// const struct sock_fprog prog = { +// static_cast<unsigned short>(program->size()), &program[0] }; +// prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); +// +class SANDBOX_EXPORT CodeGen { + public: + // A vector of BPF instructions that need to be installed as a filter + // program in the kernel. + typedef std::vector<struct sock_filter> Program; + + // Node represents a node within the instruction DAG being compiled. + using Node = Program::size_type; + + // kNullNode represents the "null" node; i.e., the reserved node + // value guaranteed to not equal any actual nodes. + static const Node kNullNode = -1; + + CodeGen(); + ~CodeGen(); + + // MakeInstruction creates a node representing the specified + // instruction, or returns and existing equivalent node if one + // exists. For details on the possible parameters refer to + // https://www.kernel.org/doc/Documentation/networking/filter.txt. + // TODO(mdempsky): Reconsider using default arguments here. + Node MakeInstruction(uint16_t code, + uint32_t k, + Node jt = kNullNode, + Node jf = kNullNode); + + // Compile linearizes the instruction DAG rooted at |head| into a + // program that can be executed by a BPF virtual machine. + void Compile(Node head, Program* program); + + private: + using MemoKey = base::Tuple<uint16_t, uint32_t, Node, Node>; + struct MemoKeyLess { + bool operator()(const MemoKey& lhs, const MemoKey& rhs) const; + }; + + // AppendInstruction adds a new instruction, ensuring that |jt| and + // |jf| are within range as necessary for |code|. + Node AppendInstruction(uint16_t code, uint32_t k, Node jt, Node jf); + + // WithinRange returns a node equivalent to |next| that is at most + // |range| instructions away from the (logical) beginning of the + // program. + Node WithinRange(Node next, size_t range); + + // Append appends a new instruction to the physical end (i.e., + // logical beginning) of |program_|. + Node Append(uint16_t code, uint32_t k, size_t jt, size_t jf); + + // Offset returns how many instructions exist in |program_| after |target|. + size_t Offset(Node target) const; + + // NOTE: program_ is the compiled program in *reverse*, so that + // indices remain stable as we add instructions. + Program program_; + + // equivalent_ stores the most recent semantically-equivalent node for each + // instruction in program_. A node is defined as semantically-equivalent to N + // if it has the same instruction code and constant as N and its successor + // nodes (if any) are semantically-equivalent to N's successor nodes, or + // if it's an unconditional jump to a node semantically-equivalent to N. + std::vector<Node> equivalent_; + + std::map<MemoKey, Node, MemoKeyLess> memos_; + + DISALLOW_COPY_AND_ASSIGN(CodeGen); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_BPF_DSL_CODEGEN_H__ diff --git a/sandbox/linux/bpf_dsl/codegen_unittest.cc b/sandbox/linux/bpf_dsl/codegen_unittest.cc new file mode 100644 index 0000000000..5961822123 --- /dev/null +++ b/sandbox/linux/bpf_dsl/codegen_unittest.cc @@ -0,0 +1,402 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/bpf_dsl/codegen.h" + +#include <map> +#include <utility> +#include <vector> + +#include "base/macros.h" +#include "base/md5.h" +#include "base/strings/string_piece.h" +#include "sandbox/linux/system_headers/linux_filter.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace sandbox { +namespace { + +// Hash provides an abstraction for building "hash trees" from BPF +// control flow graphs, and efficiently identifying equivalent graphs. +// +// For simplicity, we use MD5, because base happens to provide a +// convenient API for its use. However, any collision-resistant hash +// should suffice. +class Hash { + public: + static const Hash kZero; + + Hash() : digest_() {} + + Hash(uint16_t code, + uint32_t k, + const Hash& jt = kZero, + const Hash& jf = kZero) + : digest_() { + base::MD5Context ctx; + base::MD5Init(&ctx); + HashValue(&ctx, code); + HashValue(&ctx, k); + HashValue(&ctx, jt); + HashValue(&ctx, jf); + base::MD5Final(&digest_, &ctx); + } + + Hash(const Hash& hash) = default; + Hash& operator=(const Hash& rhs) = default; + + friend bool operator==(const Hash& lhs, const Hash& rhs) { + return lhs.Base16() == rhs.Base16(); + } + friend bool operator!=(const Hash& lhs, const Hash& rhs) { + return !(lhs == rhs); + } + + private: + template <typename T> + void HashValue(base::MD5Context* ctx, const T& value) { + base::MD5Update(ctx, + base::StringPiece(reinterpret_cast<const char*>(&value), + sizeof(value))); + } + + std::string Base16() const { + return base::MD5DigestToBase16(digest_); + } + + base::MD5Digest digest_; +}; + +const Hash Hash::kZero; + +// Sanity check that equality and inequality work on Hash as required. +TEST(CodeGen, HashSanity) { + std::vector<Hash> hashes; + + // Push a bunch of logically distinct hashes. + hashes.push_back(Hash::kZero); + for (int i = 0; i < 4; ++i) { + hashes.push_back(Hash(i & 1, i & 2)); + } + for (int i = 0; i < 16; ++i) { + hashes.push_back(Hash(i & 1, i & 2, Hash(i & 4, i & 8))); + } + for (int i = 0; i < 64; ++i) { + hashes.push_back( + Hash(i & 1, i & 2, Hash(i & 4, i & 8), Hash(i & 16, i & 32))); + } + + for (const Hash& a : hashes) { + for (const Hash& b : hashes) { + // Hashes should equal themselves, but not equal all others. + if (&a == &b) { + EXPECT_EQ(a, b); + } else { + EXPECT_NE(a, b); + } + } + } +} + +// ProgramTest provides a fixture for writing compiling sample +// programs with CodeGen and verifying the linearized output matches +// the input DAG. +class ProgramTest : public ::testing::Test { + protected: + ProgramTest() : gen_(), node_hashes_() {} + + // MakeInstruction calls CodeGen::MakeInstruction() and associated + // the returned address with a hash of the instruction. + CodeGen::Node MakeInstruction(uint16_t code, + uint32_t k, + CodeGen::Node jt = CodeGen::kNullNode, + CodeGen::Node jf = CodeGen::kNullNode) { + CodeGen::Node res = gen_.MakeInstruction(code, k, jt, jf); + EXPECT_NE(CodeGen::kNullNode, res); + + Hash digest(code, k, Lookup(jt), Lookup(jf)); + auto it = node_hashes_.insert(std::make_pair(res, digest)); + EXPECT_EQ(digest, it.first->second); + + return res; + } + + // RunTest compiles the program and verifies that the output matches + // what is expected. It should be called at the end of each program + // test case. + void RunTest(CodeGen::Node head) { + // Compile the program + CodeGen::Program program; + gen_.Compile(head, &program); + + // Walk the program backwards, and compute the hash for each instruction. + std::vector<Hash> prog_hashes(program.size()); + for (size_t i = program.size(); i > 0; --i) { + const sock_filter& insn = program.at(i - 1); + Hash& hash = prog_hashes.at(i - 1); + + if (BPF_CLASS(insn.code) == BPF_JMP) { + if (BPF_OP(insn.code) == BPF_JA) { + // The compiler adds JA instructions as needed, so skip them. + hash = prog_hashes.at(i + insn.k); + } else { + hash = Hash(insn.code, insn.k, prog_hashes.at(i + insn.jt), + prog_hashes.at(i + insn.jf)); + } + } else if (BPF_CLASS(insn.code) == BPF_RET) { + hash = Hash(insn.code, insn.k); + } else { + hash = Hash(insn.code, insn.k, prog_hashes.at(i)); + } + } + + EXPECT_EQ(Lookup(head), prog_hashes.at(0)); + } + + private: + const Hash& Lookup(CodeGen::Node next) const { + if (next == CodeGen::kNullNode) { + return Hash::kZero; + } + auto it = node_hashes_.find(next); + if (it == node_hashes_.end()) { + ADD_FAILURE() << "No hash found for node " << next; + return Hash::kZero; + } + return it->second; + } + + CodeGen gen_; + std::map<CodeGen::Node, Hash> node_hashes_; + + DISALLOW_COPY_AND_ASSIGN(ProgramTest); +}; + +TEST_F(ProgramTest, OneInstruction) { + // Create the most basic valid BPF program: + // RET 0 + CodeGen::Node head = MakeInstruction(BPF_RET + BPF_K, 0); + RunTest(head); +} + +TEST_F(ProgramTest, SimpleBranch) { + // Create a program with a single branch: + // JUMP if eq 42 then $0 else $1 + // 0: RET 1 + // 1: RET 0 + CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, + MakeInstruction(BPF_RET + BPF_K, 1), + MakeInstruction(BPF_RET + BPF_K, 0)); + RunTest(head); +} + +TEST_F(ProgramTest, AtypicalBranch) { + // Create a program with a single branch: + // JUMP if eq 42 then $0 else $0 + // 0: RET 0 + + CodeGen::Node ret = MakeInstruction(BPF_RET + BPF_K, 0); + CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret); + + // N.B.: As the instructions in both sides of the branch are already + // the same object, we do not actually have any "mergeable" branches. + // This needs to be reflected in our choice of "flags". + RunTest(head); +} + +TEST_F(ProgramTest, Complex) { + // Creates a basic BPF program that we'll use to test some of the code: + // JUMP if eq 42 the $0 else $1 (insn6) + // 0: LD 23 (insn5) + // 1: JUMP if eq 42 then $2 else $4 (insn4) + // 2: JUMP to $3 (insn2) + // 3: LD 42 (insn1) + // RET 42 (insn0) + // 4: LD 42 (insn3) + // RET 42 (insn3+) + CodeGen::Node insn0 = MakeInstruction(BPF_RET + BPF_K, 42); + CodeGen::Node insn1 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, insn0); + CodeGen::Node insn2 = insn1; // Implicit JUMP + + // We explicitly duplicate instructions to test that they're merged. + CodeGen::Node insn3 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, + MakeInstruction(BPF_RET + BPF_K, 42)); + EXPECT_EQ(insn2, insn3); + + CodeGen::Node insn4 = + MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn2, insn3); + CodeGen::Node insn5 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 23, insn4); + + // Force a basic block that ends in neither a jump instruction nor a return + // instruction. It only contains "insn5". This exercises one of the less + // common code paths in the topo-sort algorithm. + // This also gives us a diamond-shaped pattern in our graph, which stresses + // another aspect of the topo-sort algorithm (namely, the ability to + // correctly count the incoming branches for subtrees that are not disjunct). + CodeGen::Node insn6 = + MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4); + + RunTest(insn6); +} + +TEST_F(ProgramTest, ConfusingTails) { + // This simple program demonstrates https://crbug.com/351103/ + // The two "LOAD 0" instructions are blocks of their own. MergeTails() could + // be tempted to merge them since they are the same. However, they are + // not mergeable because they fall-through to non semantically equivalent + // blocks. + // Without the fix for this bug, this program should trigger the check in + // CompileAndCompare: the serialized graphs from the program and its compiled + // version will differ. + // + // 0) LOAD 1 // ??? + // 1) if A == 0x1; then JMP 2 else JMP 3 + // 2) LOAD 0 // System call number + // 3) if A == 0x2; then JMP 4 else JMP 5 + // 4) LOAD 0 // System call number + // 5) if A == 0x1; then JMP 6 else JMP 7 + // 6) RET 0 + // 7) RET 1 + + CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1); + CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0); + CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); + CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); + CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); + CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); + CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); + CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); + + RunTest(i0); +} + +TEST_F(ProgramTest, ConfusingTailsBasic) { + // Without the fix for https://crbug.com/351103/, (see + // SampleProgramConfusingTails()), this would generate a cyclic graph and + // crash as the two "LOAD 0" instructions would get merged. + // + // 0) LOAD 1 // ??? + // 1) if A == 0x1; then JMP 2 else JMP 3 + // 2) LOAD 0 // System call number + // 3) if A == 0x2; then JMP 4 else JMP 5 + // 4) LOAD 0 // System call number + // 5) RET 1 + + CodeGen::Node i5 = MakeInstruction(BPF_RET + BPF_K, 1); + CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); + CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); + CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); + CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); + CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); + + RunTest(i0); +} + +TEST_F(ProgramTest, ConfusingTailsMergeable) { + // This is similar to SampleProgramConfusingTails(), except that + // instructions 2 and 4 are now RET instructions. + // In PointerCompare(), this exercises the path where two blocks are of the + // same length and identical and the last instruction is a JMP or RET, so the + // following blocks don't need to be looked at and the blocks are mergeable. + // + // 0) LOAD 1 // ??? + // 1) if A == 0x1; then JMP 2 else JMP 3 + // 2) RET 42 + // 3) if A == 0x2; then JMP 4 else JMP 5 + // 4) RET 42 + // 5) if A == 0x1; then JMP 6 else JMP 7 + // 6) RET 0 + // 7) RET 1 + + CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1); + CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0); + CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); + CodeGen::Node i4 = MakeInstruction(BPF_RET + BPF_K, 42); + CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); + CodeGen::Node i2 = MakeInstruction(BPF_RET + BPF_K, 42); + CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); + CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); + + RunTest(i0); +} + +TEST_F(ProgramTest, InstructionFolding) { + // Check that simple instructions are folded as expected. + CodeGen::Node a = MakeInstruction(BPF_RET + BPF_K, 0); + EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0)); + CodeGen::Node b = MakeInstruction(BPF_RET + BPF_K, 1); + EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0)); + EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1)); + EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1)); + + // Check that complex sequences are folded too. + CodeGen::Node c = + MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, + MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b)); + EXPECT_EQ(c, MakeInstruction( + BPF_LD + BPF_W + BPF_ABS, 0, + MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b))); + + RunTest(c); +} + +TEST_F(ProgramTest, FarBranches) { + // BPF instructions use 8-bit fields for branch offsets, which means + // branch targets must be within 255 instructions of the branch + // instruction. CodeGen abstracts away this detail by inserting jump + // instructions as needed, which we test here by generating programs + // that should trigger any interesting boundary conditions. + + // Populate with 260 initial instruction nodes. + std::vector<CodeGen::Node> nodes; + nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0)); + for (size_t i = 1; i < 260; ++i) { + nodes.push_back( + MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back())); + } + + // Exhaustively test branch offsets near BPF's limits. + for (size_t jt = 250; jt < 260; ++jt) { + for (size_t jf = 250; jf < 260; ++jf) { + nodes.push_back(MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, + nodes.rbegin()[jt], nodes.rbegin()[jf])); + RunTest(nodes.back()); + } + } +} + +TEST_F(ProgramTest, JumpReuse) { + // As a code size optimization, we try to reuse jumps when possible + // instead of emitting new ones. Here we make sure that optimization + // is working as intended. + // + // NOTE: To simplify testing, we rely on implementation details + // about what CodeGen::Node values indicate (i.e., vector indices), + // but CodeGen users should treat them as opaque values. + + // Populate with 260 initial instruction nodes. + std::vector<CodeGen::Node> nodes; + nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0)); + for (size_t i = 1; i < 260; ++i) { + nodes.push_back( + MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back())); + } + + // Branching to nodes[0] and nodes[1] should require 3 new + // instructions: two far jumps plus the branch itself. + CodeGen::Node one = + MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, nodes[0], nodes[1]); + EXPECT_EQ(nodes.back() + 3, one); // XXX: Implementation detail! + RunTest(one); + + // Branching again to the same target nodes should require only one + // new instruction, as we can reuse the previous branch's jumps. + CodeGen::Node two = + MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, nodes[0], nodes[1]); + EXPECT_EQ(one + 1, two); // XXX: Implementation detail! + RunTest(two); +} + +} // namespace +} // namespace sandbox diff --git a/sandbox/linux/bpf_dsl/dump_bpf.cc b/sandbox/linux/bpf_dsl/dump_bpf.cc new file mode 100644 index 0000000000..d0c8f75073 --- /dev/null +++ b/sandbox/linux/bpf_dsl/dump_bpf.cc @@ -0,0 +1,109 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/bpf_dsl/dump_bpf.h" + +#include <stdio.h> + +#include "sandbox/linux/bpf_dsl/codegen.h" +#include "sandbox/linux/bpf_dsl/trap_registry.h" +#include "sandbox/linux/system_headers/linux_filter.h" +#include "sandbox/linux/system_headers/linux_seccomp.h" + +namespace sandbox { +namespace bpf_dsl { + +void DumpBPF::PrintProgram(const CodeGen::Program& program) { + for (CodeGen::Program::const_iterator iter = program.begin(); + iter != program.end(); + ++iter) { + int ip = (int)(iter - program.begin()); + fprintf(stderr, "%3d) ", ip); + switch (BPF_CLASS(iter->code)) { + case BPF_LD: + if (iter->code == BPF_LD + BPF_W + BPF_ABS) { + fprintf(stderr, "LOAD %d // ", (int)iter->k); + if (iter->k == offsetof(struct arch_seccomp_data, nr)) { + fprintf(stderr, "System call number\n"); + } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) { + fprintf(stderr, "Architecture\n"); + } else if (iter->k == + offsetof(struct arch_seccomp_data, instruction_pointer)) { + fprintf(stderr, "Instruction pointer (LSB)\n"); + } else if (iter->k == + offsetof(struct arch_seccomp_data, instruction_pointer) + + 4) { + fprintf(stderr, "Instruction pointer (MSB)\n"); + } else if (iter->k >= offsetof(struct arch_seccomp_data, args) && + iter->k < offsetof(struct arch_seccomp_data, args) + 48 && + (iter->k - offsetof(struct arch_seccomp_data, args)) % 4 == + 0) { + fprintf( + stderr, + "Argument %d (%cSB)\n", + (int)(iter->k - offsetof(struct arch_seccomp_data, args)) / 8, + (iter->k - offsetof(struct arch_seccomp_data, args)) % 8 ? 'M' + : 'L'); + } else { + fprintf(stderr, "???\n"); + } + } else { + fprintf(stderr, "LOAD ???\n"); + } + break; + case BPF_JMP: + if (BPF_OP(iter->code) == BPF_JA) { + fprintf(stderr, "JMP %d\n", ip + iter->k + 1); + } else { + fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n", + BPF_OP(iter->code) == BPF_JSET ? "&" : + BPF_OP(iter->code) == BPF_JEQ ? "==" : + BPF_OP(iter->code) == BPF_JGE ? ">=" : + BPF_OP(iter->code) == BPF_JGT ? ">" : "???", + (int)iter->k, + ip + iter->jt + 1, ip + iter->jf + 1); + } + break; + case BPF_RET: + fprintf(stderr, "RET 0x%x // ", iter->k); + if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) { + fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA); + } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { + fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA); + } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRACE) { + fprintf(stderr, "Trace #%d\n", iter->k & SECCOMP_RET_DATA); + } else if (iter->k == SECCOMP_RET_ALLOW) { + fprintf(stderr, "Allowed\n"); + } else { + fprintf(stderr, "???\n"); + } + break; + case BPF_ALU: + if (BPF_OP(iter->code) == BPF_NEG) { + fprintf(stderr, "A := -A\n"); + } else { + fprintf(stderr, "A := A %s 0x%x\n", + BPF_OP(iter->code) == BPF_ADD ? "+" : + BPF_OP(iter->code) == BPF_SUB ? "-" : + BPF_OP(iter->code) == BPF_MUL ? "*" : + BPF_OP(iter->code) == BPF_DIV ? "/" : + BPF_OP(iter->code) == BPF_MOD ? "%" : + BPF_OP(iter->code) == BPF_OR ? "|" : + BPF_OP(iter->code) == BPF_XOR ? "^" : + BPF_OP(iter->code) == BPF_AND ? "&" : + BPF_OP(iter->code) == BPF_LSH ? "<<" : + BPF_OP(iter->code) == BPF_RSH ? ">>" : "???", + (int)iter->k); + } + break; + default: + fprintf(stderr, "???\n"); + break; + } + } + return; +} + +} // namespace bpf_dsl +} // namespace sandbox diff --git a/sandbox/linux/bpf_dsl/dump_bpf.h b/sandbox/linux/bpf_dsl/dump_bpf.h new file mode 100644 index 0000000000..cd12be793d --- /dev/null +++ b/sandbox/linux/bpf_dsl/dump_bpf.h @@ -0,0 +1,18 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/bpf_dsl/codegen.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { +namespace bpf_dsl { + +class SANDBOX_EXPORT DumpBPF { + public: + // PrintProgram writes |program| in a human-readable format to stderr. + static void PrintProgram(const CodeGen::Program& program); +}; + +} // namespace bpf_dsl +} // namespace sandbox diff --git a/sandbox/linux/bpf_dsl/policy.cc b/sandbox/linux/bpf_dsl/policy.cc new file mode 100644 index 0000000000..c20edc6da8 --- /dev/null +++ b/sandbox/linux/bpf_dsl/policy.cc @@ -0,0 +1,19 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/bpf_dsl/policy.h" + +#include <errno.h> + +#include "sandbox/linux/bpf_dsl/bpf_dsl.h" + +namespace sandbox { +namespace bpf_dsl { + +ResultExpr Policy::InvalidSyscall() const { + return Error(ENOSYS); +} + +} // namespace bpf_dsl +} // namespace sandbox diff --git a/sandbox/linux/bpf_dsl/policy.h b/sandbox/linux/bpf_dsl/policy.h new file mode 100644 index 0000000000..6c67589456 --- /dev/null +++ b/sandbox/linux/bpf_dsl/policy.h @@ -0,0 +1,37 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_BPF_DSL_POLICY_H_ +#define SANDBOX_LINUX_BPF_DSL_POLICY_H_ + +#include "base/macros.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl_forward.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { +namespace bpf_dsl { + +// Interface to implement to define a BPF sandbox policy. +class SANDBOX_EXPORT Policy { + public: + Policy() {} + virtual ~Policy() {} + + // User extension point for writing custom sandbox policies. + // The returned ResultExpr will control how the kernel responds to the + // specified system call number. + virtual ResultExpr EvaluateSyscall(int sysno) const = 0; + + // Optional overload for specifying alternate behavior for invalid + // system calls. The default is to return ENOSYS. + virtual ResultExpr InvalidSyscall() const; + + private: + DISALLOW_COPY_AND_ASSIGN(Policy); +}; + +} // namespace bpf_dsl +} // namespace sandbox + +#endif // SANDBOX_LINUX_BPF_DSL_POLICY_H_ diff --git a/sandbox/linux/bpf_dsl/policy_compiler.cc b/sandbox/linux/bpf_dsl/policy_compiler.cc new file mode 100644 index 0000000000..f38232f85f --- /dev/null +++ b/sandbox/linux/bpf_dsl/policy_compiler.cc @@ -0,0 +1,499 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/bpf_dsl/policy_compiler.h" + +#include <errno.h> +#include <sys/syscall.h> + +#include <limits> + +#include "base/logging.h" +#include "base/macros.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl_impl.h" +#include "sandbox/linux/bpf_dsl/codegen.h" +#include "sandbox/linux/bpf_dsl/dump_bpf.h" +#include "sandbox/linux/bpf_dsl/policy.h" +#include "sandbox/linux/bpf_dsl/seccomp_macros.h" +#include "sandbox/linux/bpf_dsl/syscall_set.h" +#include "sandbox/linux/bpf_dsl/verifier.h" +#include "sandbox/linux/seccomp-bpf/errorcode.h" +#include "sandbox/linux/system_headers/linux_filter.h" +#include "sandbox/linux/system_headers/linux_seccomp.h" +#include "sandbox/linux/system_headers/linux_syscalls.h" + +namespace sandbox { +namespace bpf_dsl { + +namespace { + +#if defined(__i386__) || defined(__x86_64__) +const bool kIsIntel = true; +#else +const bool kIsIntel = false; +#endif +#if defined(__x86_64__) && defined(__ILP32__) +const bool kIsX32 = true; +#else +const bool kIsX32 = false; +#endif + +const int kSyscallsRequiredForUnsafeTraps[] = { + __NR_rt_sigprocmask, + __NR_rt_sigreturn, +#if defined(__NR_sigprocmask) + __NR_sigprocmask, +#endif +#if defined(__NR_sigreturn) + __NR_sigreturn, +#endif +}; + +bool HasExactlyOneBit(uint64_t x) { + // Common trick; e.g., see http://stackoverflow.com/a/108329. + return x != 0 && (x & (x - 1)) == 0; +} + +// A Trap() handler that returns an "errno" value. The value is encoded +// in the "aux" parameter. +intptr_t ReturnErrno(const struct arch_seccomp_data&, void* aux) { + // TrapFnc functions report error by following the native kernel convention + // of returning an exit code in the range of -1..-4096. They do not try to + // set errno themselves. The glibc wrapper that triggered the SIGSYS will + // ultimately do so for us. + int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA; + return -err; +} + +bool HasUnsafeTraps(const Policy* policy) { + DCHECK(policy); + for (uint32_t sysnum : SyscallSet::ValidOnly()) { + if (policy->EvaluateSyscall(sysnum)->HasUnsafeTraps()) { + return true; + } + } + return policy->InvalidSyscall()->HasUnsafeTraps(); +} + +} // namespace + +struct PolicyCompiler::Range { + uint32_t from; + CodeGen::Node node; +}; + +PolicyCompiler::PolicyCompiler(const Policy* policy, TrapRegistry* registry) + : policy_(policy), + registry_(registry), + escapepc_(0), + conds_(), + gen_(), + has_unsafe_traps_(HasUnsafeTraps(policy_)) { + DCHECK(policy); +} + +PolicyCompiler::~PolicyCompiler() { +} + +scoped_ptr<CodeGen::Program> PolicyCompiler::Compile(bool verify) { + CHECK(policy_->InvalidSyscall()->IsDeny()) + << "Policies should deny invalid system calls"; + + // If our BPF program has unsafe traps, enable support for them. + if (has_unsafe_traps_) { + CHECK_NE(0U, escapepc_) << "UnsafeTrap() requires a valid escape PC"; + + for (int sysnum : kSyscallsRequiredForUnsafeTraps) { + CHECK(policy_->EvaluateSyscall(sysnum)->IsAllow()) + << "Policies that use UnsafeTrap() must unconditionally allow all " + "required system calls"; + } + + CHECK(registry_->EnableUnsafeTraps()) + << "We'd rather die than enable unsafe traps"; + } + + // Assemble the BPF filter program. + scoped_ptr<CodeGen::Program> program(new CodeGen::Program()); + gen_.Compile(AssemblePolicy(), program.get()); + + // Make sure compilation resulted in a BPF program that executes + // correctly. Otherwise, there is an internal error in our BPF compiler. + // There is really nothing the caller can do until the bug is fixed. + if (verify) { + const char* err = nullptr; + if (!Verifier::VerifyBPF(this, *program, *policy_, &err)) { + DumpBPF::PrintProgram(*program); + LOG(FATAL) << err; + } + } + + return program.Pass(); +} + +void PolicyCompiler::DangerousSetEscapePC(uint64_t escapepc) { + escapepc_ = escapepc; +} + +CodeGen::Node PolicyCompiler::AssemblePolicy() { + // A compiled policy consists of three logical parts: + // 1. Check that the "arch" field matches the expected architecture. + // 2. If the policy involves unsafe traps, check if the syscall was + // invoked by Syscall::Call, and then allow it unconditionally. + // 3. Check the system call number and jump to the appropriate compiled + // system call policy number. + return CheckArch(MaybeAddEscapeHatch(DispatchSyscall())); +} + +CodeGen::Node PolicyCompiler::CheckArch(CodeGen::Node passed) { + // If the architecture doesn't match SECCOMP_ARCH, disallow the + // system call. + return gen_.MakeInstruction( + BPF_LD + BPF_W + BPF_ABS, SECCOMP_ARCH_IDX, + gen_.MakeInstruction( + BPF_JMP + BPF_JEQ + BPF_K, SECCOMP_ARCH, passed, + CompileResult(Kill("Invalid audit architecture in BPF filter")))); +} + +CodeGen::Node PolicyCompiler::MaybeAddEscapeHatch(CodeGen::Node rest) { + // If no unsafe traps, then simply return |rest|. + if (!has_unsafe_traps_) { + return rest; + } + + // We already enabled unsafe traps in Compile, but enable them again to give + // the trap registry a second chance to complain before we add the backdoor. + CHECK(registry_->EnableUnsafeTraps()); + + // Allow system calls, if they originate from our magic return address. + const uint32_t lopc = static_cast<uint32_t>(escapepc_); + const uint32_t hipc = static_cast<uint32_t>(escapepc_ >> 32); + + // BPF cannot do native 64-bit comparisons, so we have to compare + // both 32-bit halves of the instruction pointer. If they match what + // we expect, we return ERR_ALLOWED. If either or both don't match, + // we continue evalutating the rest of the sandbox policy. + // + // For simplicity, we check the full 64-bit instruction pointer even + // on 32-bit architectures. + return gen_.MakeInstruction( + BPF_LD + BPF_W + BPF_ABS, SECCOMP_IP_LSB_IDX, + gen_.MakeInstruction( + BPF_JMP + BPF_JEQ + BPF_K, lopc, + gen_.MakeInstruction( + BPF_LD + BPF_W + BPF_ABS, SECCOMP_IP_MSB_IDX, + gen_.MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, hipc, + CompileResult(Allow()), rest)), + rest)); +} + +CodeGen::Node PolicyCompiler::DispatchSyscall() { + // Evaluate all possible system calls and group their ErrorCodes into + // ranges of identical codes. + Ranges ranges; + FindRanges(&ranges); + + // Compile the system call ranges to an optimized BPF jumptable + CodeGen::Node jumptable = AssembleJumpTable(ranges.begin(), ranges.end()); + + // Grab the system call number, so that we can check it and then + // execute the jump table. + return gen_.MakeInstruction( + BPF_LD + BPF_W + BPF_ABS, SECCOMP_NR_IDX, CheckSyscallNumber(jumptable)); +} + +CodeGen::Node PolicyCompiler::CheckSyscallNumber(CodeGen::Node passed) { + if (kIsIntel) { + // On Intel architectures, verify that system call numbers are in the + // expected number range. + CodeGen::Node invalidX32 = + CompileResult(Kill("Illegal mixing of system call ABIs")); + if (kIsX32) { + // The newer x32 API always sets bit 30. + return gen_.MakeInstruction( + BPF_JMP + BPF_JSET + BPF_K, 0x40000000, passed, invalidX32); + } else { + // The older i386 and x86-64 APIs clear bit 30 on all system calls. + return gen_.MakeInstruction( + BPF_JMP + BPF_JSET + BPF_K, 0x40000000, invalidX32, passed); + } + } + + // TODO(mdempsky): Similar validation for other architectures? + return passed; +} + +void PolicyCompiler::FindRanges(Ranges* ranges) { + // Please note that "struct seccomp_data" defines system calls as a signed + // int32_t, but BPF instructions always operate on unsigned quantities. We + // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL, + // and then verifying that the rest of the number range (both positive and + // negative) all return the same ErrorCode. + const CodeGen::Node invalid_node = CompileResult(policy_->InvalidSyscall()); + uint32_t old_sysnum = 0; + CodeGen::Node old_node = + SyscallSet::IsValid(old_sysnum) + ? CompileResult(policy_->EvaluateSyscall(old_sysnum)) + : invalid_node; + + for (uint32_t sysnum : SyscallSet::All()) { + CodeGen::Node node = + SyscallSet::IsValid(sysnum) + ? CompileResult(policy_->EvaluateSyscall(static_cast<int>(sysnum))) + : invalid_node; + // N.B., here we rely on CodeGen folding (i.e., returning the same + // node value for) identical code sequences, otherwise our jump + // table will blow up in size. + if (node != old_node) { + ranges->push_back(Range{old_sysnum, old_node}); + old_sysnum = sysnum; + old_node = node; + } + } + ranges->push_back(Range{old_sysnum, old_node}); +} + +CodeGen::Node PolicyCompiler::AssembleJumpTable(Ranges::const_iterator start, + Ranges::const_iterator stop) { + // We convert the list of system call ranges into jump table that performs + // a binary search over the ranges. + // As a sanity check, we need to have at least one distinct ranges for us + // to be able to build a jump table. + CHECK(start < stop) << "Invalid iterator range"; + const auto n = stop - start; + if (n == 1) { + // If we have narrowed things down to a single range object, we can + // return from the BPF filter program. + return start->node; + } + + // Pick the range object that is located at the mid point of our list. + // We compare our system call number against the lowest valid system call + // number in this range object. If our number is lower, it is outside of + // this range object. If it is greater or equal, it might be inside. + Ranges::const_iterator mid = start + n / 2; + + // Sub-divide the list of ranges and continue recursively. + CodeGen::Node jf = AssembleJumpTable(start, mid); + CodeGen::Node jt = AssembleJumpTable(mid, stop); + return gen_.MakeInstruction(BPF_JMP + BPF_JGE + BPF_K, mid->from, jt, jf); +} + +CodeGen::Node PolicyCompiler::CompileResult(const ResultExpr& res) { + return RetExpression(res->Compile(this)); +} + +CodeGen::Node PolicyCompiler::RetExpression(const ErrorCode& err) { + switch (err.error_type()) { + case ErrorCode::ET_COND: + return CondExpression(err); + case ErrorCode::ET_SIMPLE: + case ErrorCode::ET_TRAP: + return gen_.MakeInstruction(BPF_RET + BPF_K, err.err()); + default: + LOG(FATAL) + << "ErrorCode is not suitable for returning from a BPF program"; + return CodeGen::kNullNode; + } +} + +CodeGen::Node PolicyCompiler::CondExpression(const ErrorCode& cond) { + // Sanity check that |cond| makes sense. + CHECK(cond.argno_ >= 0 && cond.argno_ < 6) << "Invalid argument number " + << cond.argno_; + CHECK(cond.width_ == ErrorCode::TP_32BIT || + cond.width_ == ErrorCode::TP_64BIT) + << "Invalid argument width " << cond.width_; + CHECK_NE(0U, cond.mask_) << "Zero mask is invalid"; + CHECK_EQ(cond.value_, cond.value_ & cond.mask_) + << "Value contains masked out bits"; + if (sizeof(void*) == 4) { + CHECK_EQ(ErrorCode::TP_32BIT, cond.width_) + << "Invalid width on 32-bit platform"; + } + if (cond.width_ == ErrorCode::TP_32BIT) { + CHECK_EQ(0U, cond.mask_ >> 32) << "Mask exceeds argument size"; + CHECK_EQ(0U, cond.value_ >> 32) << "Value exceeds argument size"; + } + + CodeGen::Node passed = RetExpression(*cond.passed_); + CodeGen::Node failed = RetExpression(*cond.failed_); + + // We want to emit code to check "(arg & mask) == value" where arg, mask, and + // value are 64-bit values, but the BPF machine is only 32-bit. We implement + // this by independently testing the upper and lower 32-bits and continuing to + // |passed| if both evaluate true, or to |failed| if either evaluate false. + return CondExpressionHalf(cond, + UpperHalf, + CondExpressionHalf(cond, LowerHalf, passed, failed), + failed); +} + +CodeGen::Node PolicyCompiler::CondExpressionHalf(const ErrorCode& cond, + ArgHalf half, + CodeGen::Node passed, + CodeGen::Node failed) { + if (cond.width_ == ErrorCode::TP_32BIT && half == UpperHalf) { + // Special logic for sanity checking the upper 32-bits of 32-bit system + // call arguments. + + // TODO(mdempsky): Compile Unexpected64bitArgument() just per program. + CodeGen::Node invalid_64bit = RetExpression(Unexpected64bitArgument()); + + const uint32_t upper = SECCOMP_ARG_MSB_IDX(cond.argno_); + const uint32_t lower = SECCOMP_ARG_LSB_IDX(cond.argno_); + + if (sizeof(void*) == 4) { + // On 32-bit platforms, the upper 32-bits should always be 0: + // LDW [upper] + // JEQ 0, passed, invalid + return gen_.MakeInstruction( + BPF_LD + BPF_W + BPF_ABS, + upper, + gen_.MakeInstruction( + BPF_JMP + BPF_JEQ + BPF_K, 0, passed, invalid_64bit)); + } + + // On 64-bit platforms, the upper 32-bits may be 0 or ~0; but we only allow + // ~0 if the sign bit of the lower 32-bits is set too: + // LDW [upper] + // JEQ 0, passed, (next) + // JEQ ~0, (next), invalid + // LDW [lower] + // JSET (1<<31), passed, invalid + // + // TODO(mdempsky): The JSET instruction could perhaps jump to passed->next + // instead, as the first instruction of passed should be "LDW [lower]". + return gen_.MakeInstruction( + BPF_LD + BPF_W + BPF_ABS, + upper, + gen_.MakeInstruction( + BPF_JMP + BPF_JEQ + BPF_K, + 0, + passed, + gen_.MakeInstruction( + BPF_JMP + BPF_JEQ + BPF_K, + std::numeric_limits<uint32_t>::max(), + gen_.MakeInstruction( + BPF_LD + BPF_W + BPF_ABS, + lower, + gen_.MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, + 1U << 31, + passed, + invalid_64bit)), + invalid_64bit))); + } + + const uint32_t idx = (half == UpperHalf) ? SECCOMP_ARG_MSB_IDX(cond.argno_) + : SECCOMP_ARG_LSB_IDX(cond.argno_); + const uint32_t mask = (half == UpperHalf) ? cond.mask_ >> 32 : cond.mask_; + const uint32_t value = (half == UpperHalf) ? cond.value_ >> 32 : cond.value_; + + // Emit a suitable instruction sequence for (arg & mask) == value. + + // For (arg & 0) == 0, just return passed. + if (mask == 0) { + CHECK_EQ(0U, value); + return passed; + } + + // For (arg & ~0) == value, emit: + // LDW [idx] + // JEQ value, passed, failed + if (mask == std::numeric_limits<uint32_t>::max()) { + return gen_.MakeInstruction( + BPF_LD + BPF_W + BPF_ABS, + idx, + gen_.MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, value, passed, failed)); + } + + // For (arg & mask) == 0, emit: + // LDW [idx] + // JSET mask, failed, passed + // (Note: failed and passed are intentionally swapped.) + if (value == 0) { + return gen_.MakeInstruction( + BPF_LD + BPF_W + BPF_ABS, + idx, + gen_.MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, mask, failed, passed)); + } + + // For (arg & x) == x where x is a single-bit value, emit: + // LDW [idx] + // JSET mask, passed, failed + if (mask == value && HasExactlyOneBit(mask)) { + return gen_.MakeInstruction( + BPF_LD + BPF_W + BPF_ABS, + idx, + gen_.MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, mask, passed, failed)); + } + + // Generic fallback: + // LDW [idx] + // AND mask + // JEQ value, passed, failed + return gen_.MakeInstruction( + BPF_LD + BPF_W + BPF_ABS, + idx, + gen_.MakeInstruction( + BPF_ALU + BPF_AND + BPF_K, + mask, + gen_.MakeInstruction( + BPF_JMP + BPF_JEQ + BPF_K, value, passed, failed))); +} + +ErrorCode PolicyCompiler::Unexpected64bitArgument() { + return Kill("Unexpected 64bit argument detected")->Compile(this); +} + +ErrorCode PolicyCompiler::Error(int err) { + if (has_unsafe_traps_) { + // When inside an UnsafeTrap() callback, we want to allow all system calls. + // This means, we must conditionally disable the sandbox -- and that's not + // something that kernel-side BPF filters can do, as they cannot inspect + // any state other than the syscall arguments. + // But if we redirect all error handlers to user-space, then we can easily + // make this decision. + // The performance penalty for this extra round-trip to user-space is not + // actually that bad, as we only ever pay it for denied system calls; and a + // typical program has very few of these. + return Trap(ReturnErrno, reinterpret_cast<void*>(err), true); + } + + return ErrorCode(err); +} + +ErrorCode PolicyCompiler::Trap(TrapRegistry::TrapFnc fnc, + const void* aux, + bool safe) { + uint16_t trap_id = registry_->Add(fnc, aux, safe); + return ErrorCode(trap_id, fnc, aux, safe); +} + +bool PolicyCompiler::IsRequiredForUnsafeTrap(int sysno) { + for (size_t i = 0; i < arraysize(kSyscallsRequiredForUnsafeTraps); ++i) { + if (sysno == kSyscallsRequiredForUnsafeTraps[i]) { + return true; + } + } + return false; +} + +ErrorCode PolicyCompiler::CondMaskedEqual(int argno, + ErrorCode::ArgType width, + uint64_t mask, + uint64_t value, + const ErrorCode& passed, + const ErrorCode& failed) { + return ErrorCode(argno, + width, + mask, + value, + &*conds_.insert(passed).first, + &*conds_.insert(failed).first); +} + +} // namespace bpf_dsl +} // namespace sandbox diff --git a/sandbox/linux/bpf_dsl/policy_compiler.h b/sandbox/linux/bpf_dsl/policy_compiler.h new file mode 100644 index 0000000000..df38d4ccbc --- /dev/null +++ b/sandbox/linux/bpf_dsl/policy_compiler.h @@ -0,0 +1,159 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_BPF_DSL_POLICY_COMPILER_H_ +#define SANDBOX_LINUX_BPF_DSL_POLICY_COMPILER_H_ + +#include <stdint.h> + +#include <map> +#include <set> +#include <vector> + +#include "base/macros.h" +#include "base/memory/scoped_ptr.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl_forward.h" +#include "sandbox/linux/bpf_dsl/codegen.h" +#include "sandbox/linux/seccomp-bpf/errorcode.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { +namespace bpf_dsl { +class Policy; + +// PolicyCompiler implements the bpf_dsl compiler, allowing users to +// transform bpf_dsl policies into BPF programs to be executed by the +// Linux kernel. +class SANDBOX_EXPORT PolicyCompiler { + public: + PolicyCompiler(const Policy* policy, TrapRegistry* registry); + ~PolicyCompiler(); + + // Compile registers any trap handlers needed by the policy and + // compiles the policy to a BPF program, which it returns. + scoped_ptr<CodeGen::Program> Compile(bool verify); + + // DangerousSetEscapePC sets the "escape PC" that is allowed to issue any + // system calls, regardless of policy. + void DangerousSetEscapePC(uint64_t escapepc); + + // Error returns an ErrorCode to indicate the system call should fail with + // the specified error number. + ErrorCode Error(int err); + + // Trap returns an ErrorCode to indicate the system call should + // instead invoke a trap handler. + ErrorCode Trap(TrapRegistry::TrapFnc fnc, const void* aux, bool safe); + + // UnsafeTraps require some syscalls to always be allowed. + // This helper function returns true for these calls. + static bool IsRequiredForUnsafeTrap(int sysno); + + // We can also use ErrorCode to request evaluation of a conditional + // statement based on inspection of system call parameters. + // This method wrap an ErrorCode object around the conditional statement. + // Argument "argno" (1..6) will be bitwise-AND'd with "mask" and compared + // to "value"; if equal, then "passed" will be returned, otherwise "failed". + // If "is32bit" is set, the argument must in the range of 0x0..(1u << 32 - 1) + // If it is outside this range, the sandbox treats the system call just + // the same as any other ABI violation (i.e. it aborts with an error + // message). + ErrorCode CondMaskedEqual(int argno, + ErrorCode::ArgType is_32bit, + uint64_t mask, + uint64_t value, + const ErrorCode& passed, + const ErrorCode& failed); + + // Returns the fatal ErrorCode that is used to indicate that somebody + // attempted to pass a 64bit value in a 32bit system call argument. + // This method is primarily needed for testing purposes. + ErrorCode Unexpected64bitArgument(); + + private: + struct Range; + typedef std::vector<Range> Ranges; + typedef std::set<ErrorCode, struct ErrorCode::LessThan> Conds; + + // Used by CondExpressionHalf to track which half of the argument it's + // emitting instructions for. + enum ArgHalf { + LowerHalf, + UpperHalf, + }; + + // Compile the configured policy into a complete instruction sequence. + CodeGen::Node AssemblePolicy(); + + // Return an instruction sequence that checks the + // arch_seccomp_data's "arch" field is valid, and then passes + // control to |passed| if so. + CodeGen::Node CheckArch(CodeGen::Node passed); + + // If |has_unsafe_traps_| is true, returns an instruction sequence + // that allows all system calls from |escapepc_|, and otherwise + // passes control to |rest|. Otherwise, simply returns |rest|. + CodeGen::Node MaybeAddEscapeHatch(CodeGen::Node rest); + + // Return an instruction sequence that loads and checks the system + // call number, performs a binary search, and then dispatches to an + // appropriate instruction sequence compiled from the current + // policy. + CodeGen::Node DispatchSyscall(); + + // Return an instruction sequence that checks the system call number + // (expected to be loaded in register A) and if valid, passes + // control to |passed| (with register A still valid). + CodeGen::Node CheckSyscallNumber(CodeGen::Node passed); + + // Finds all the ranges of system calls that need to be handled. Ranges are + // sorted in ascending order of system call numbers. There are no gaps in the + // ranges. System calls with identical ErrorCodes are coalesced into a single + // range. + void FindRanges(Ranges* ranges); + + // Returns a BPF program snippet that implements a jump table for the + // given range of system call numbers. This function runs recursively. + CodeGen::Node AssembleJumpTable(Ranges::const_iterator start, + Ranges::const_iterator stop); + + // CompileResult compiles an individual result expression into a + // CodeGen node. + CodeGen::Node CompileResult(const ResultExpr& res); + + // Returns a BPF program snippet that makes the BPF filter program exit + // with the given ErrorCode "err". N.B. the ErrorCode may very well be a + // conditional expression; if so, this function will recursively call + // CondExpression() and possibly RetExpression() to build a complex set of + // instructions. + CodeGen::Node RetExpression(const ErrorCode& err); + + // Returns a BPF program that evaluates the conditional expression in + // "cond" and returns the appropriate value from the BPF filter program. + // This function recursively calls RetExpression(); it should only ever be + // called from RetExpression(). + CodeGen::Node CondExpression(const ErrorCode& cond); + + // Returns a BPF program that evaluates half of a conditional expression; + // it should only ever be called from CondExpression(). + CodeGen::Node CondExpressionHalf(const ErrorCode& cond, + ArgHalf half, + CodeGen::Node passed, + CodeGen::Node failed); + + const Policy* policy_; + TrapRegistry* registry_; + uint64_t escapepc_; + + Conds conds_; + CodeGen gen_; + bool has_unsafe_traps_; + + DISALLOW_COPY_AND_ASSIGN(PolicyCompiler); +}; + +} // namespace bpf_dsl +} // namespace sandbox + +#endif // SANDBOX_LINUX_BPF_DSL_POLICY_COMPILER_H_ diff --git a/sandbox/linux/bpf_dsl/syscall_set.cc b/sandbox/linux/bpf_dsl/syscall_set.cc new file mode 100644 index 0000000000..47810e99ac --- /dev/null +++ b/sandbox/linux/bpf_dsl/syscall_set.cc @@ -0,0 +1,144 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/bpf_dsl/syscall_set.h" + +#include "base/logging.h" +#include "base/macros.h" +#include "sandbox/linux/bpf_dsl/linux_syscall_ranges.h" + +namespace sandbox { + +namespace { + +#if defined(__mips__) && (_MIPS_SIM == _MIPS_SIM_ABI32) +// This is true for Mips O32 ABI. +static_assert(MIN_SYSCALL == __NR_Linux, "min syscall number should be 4000"); +#else +// This true for supported architectures (Intel and ARM EABI). +static_assert(MIN_SYSCALL == 0u, + "min syscall should always be zero"); +#endif + +// SyscallRange represents an inclusive range of system call numbers. +struct SyscallRange { + uint32_t first; + uint32_t last; +}; + +const SyscallRange kValidSyscallRanges[] = { + // First we iterate up to MAX_PUBLIC_SYSCALL, which is equal to MAX_SYSCALL + // on Intel architectures, but leaves room for private syscalls on ARM. + {MIN_SYSCALL, MAX_PUBLIC_SYSCALL}, +#if defined(__arm__) + // ARM EABI includes "ARM private" system calls starting at + // MIN_PRIVATE_SYSCALL, and a "ghost syscall private to the kernel" at + // MIN_GHOST_SYSCALL. + {MIN_PRIVATE_SYSCALL, MAX_PRIVATE_SYSCALL}, + {MIN_GHOST_SYSCALL, MAX_SYSCALL}, +#endif +}; + +} // namespace + +SyscallSet::Iterator SyscallSet::begin() const { + return Iterator(set_, false); +} + +SyscallSet::Iterator SyscallSet::end() const { + return Iterator(set_, true); +} + +bool SyscallSet::IsValid(uint32_t num) { + for (const SyscallRange& range : kValidSyscallRanges) { + if (num >= range.first && num <= range.last) { + return true; + } + } + return false; +} + +bool operator==(const SyscallSet& lhs, const SyscallSet& rhs) { + return (lhs.set_ == rhs.set_); +} + +SyscallSet::Iterator::Iterator(Set set, bool done) + : set_(set), done_(done), num_(0) { + // If the set doesn't contain 0, we need to skip to the next element. + if (!done && set_ == (IsValid(num_) ? Set::INVALID_ONLY : Set::VALID_ONLY)) { + ++*this; + } +} + +uint32_t SyscallSet::Iterator::operator*() const { + DCHECK(!done_); + return num_; +} + +SyscallSet::Iterator& SyscallSet::Iterator::operator++() { + DCHECK(!done_); + + num_ = NextSyscall(); + if (num_ == 0) { + done_ = true; + } + + return *this; +} + +// NextSyscall returns the next system call in the iterated system +// call set after |num_|, or 0 if no such system call exists. +uint32_t SyscallSet::Iterator::NextSyscall() const { + const bool want_valid = (set_ != Set::INVALID_ONLY); + const bool want_invalid = (set_ != Set::VALID_ONLY); + + for (const SyscallRange& range : kValidSyscallRanges) { + if (want_invalid && range.first > 0 && num_ < range.first - 1) { + // Even when iterating invalid syscalls, we only include the end points; + // so skip directly to just before the next (valid) range. + return range.first - 1; + } + if (want_valid && num_ < range.first) { + return range.first; + } + if (want_valid && num_ < range.last) { + return num_ + 1; + } + if (want_invalid && num_ <= range.last) { + return range.last + 1; + } + } + + if (want_invalid) { + // BPF programs only ever operate on unsigned quantities. So, + // that's how we iterate; we return values from + // 0..0xFFFFFFFFu. But there are places, where the kernel might + // interpret system call numbers as signed quantities, so the + // boundaries between signed and unsigned values are potential + // problem cases. We want to explicitly return these values from + // our iterator. + if (num_ < 0x7FFFFFFFu) + return 0x7FFFFFFFu; + if (num_ < 0x80000000u) + return 0x80000000u; + + if (num_ < 0xFFFFFFFFu) + return 0xFFFFFFFFu; + } + + return 0; +} + +bool operator==(const SyscallSet::Iterator& lhs, + const SyscallSet::Iterator& rhs) { + DCHECK(lhs.set_ == rhs.set_); + return (lhs.done_ == rhs.done_) && (lhs.num_ == rhs.num_); +} + +bool operator!=(const SyscallSet::Iterator& lhs, + const SyscallSet::Iterator& rhs) { + return !(lhs == rhs); +} + +} // namespace sandbox diff --git a/sandbox/linux/bpf_dsl/syscall_set.h b/sandbox/linux/bpf_dsl/syscall_set.h new file mode 100644 index 0000000000..b9f076d932 --- /dev/null +++ b/sandbox/linux/bpf_dsl/syscall_set.h @@ -0,0 +1,103 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_BPF_DSL_SYSCALL_SET_H__ +#define SANDBOX_LINUX_BPF_DSL_SYSCALL_SET_H__ + +#include <stdint.h> + +#include <iterator> + +#include "base/macros.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +// Iterates over the entire system call range from 0..0xFFFFFFFFu. This +// iterator is aware of how system calls look like and will skip quickly +// over ranges that can't contain system calls. It iterates more slowly +// whenever it reaches a range that is potentially problematic, returning +// the last invalid value before a valid range of system calls, and the +// first invalid value after a valid range of syscalls. It iterates over +// individual values whenever it is in the normal range for system calls +// (typically MIN_SYSCALL..MAX_SYSCALL). +// +// Example usage: +// for (uint32_t sysnum : SyscallSet::All()) { +// // Do something with sysnum. +// } +class SANDBOX_EXPORT SyscallSet { + public: + class Iterator; + + SyscallSet(const SyscallSet& ss) : set_(ss.set_) {} + ~SyscallSet() {} + + Iterator begin() const; + Iterator end() const; + + // All returns a SyscallSet that contains both valid and invalid + // system call numbers. + static SyscallSet All() { return SyscallSet(Set::ALL); } + + // ValidOnly returns a SyscallSet that contains only valid system + // call numbers. + static SyscallSet ValidOnly() { return SyscallSet(Set::VALID_ONLY); } + + // InvalidOnly returns a SyscallSet that contains only invalid + // system call numbers, but still omits numbers in the middle of a + // range of invalid system call numbers. + static SyscallSet InvalidOnly() { return SyscallSet(Set::INVALID_ONLY); } + + // IsValid returns whether |num| specifies a valid system call + // number. + static bool IsValid(uint32_t num); + + private: + enum class Set { ALL, VALID_ONLY, INVALID_ONLY }; + + explicit SyscallSet(Set set) : set_(set) {} + + Set set_; + + friend bool operator==(const SyscallSet&, const SyscallSet&); + DISALLOW_ASSIGN(SyscallSet); +}; + +SANDBOX_EXPORT bool operator==(const SyscallSet& lhs, const SyscallSet& rhs); + +// Iterator provides C++ input iterator semantics for traversing a +// SyscallSet. +class SyscallSet::Iterator + : public std::iterator<std::input_iterator_tag, uint32_t> { + public: + Iterator(const Iterator& it) + : set_(it.set_), done_(it.done_), num_(it.num_) {} + ~Iterator() {} + + uint32_t operator*() const; + Iterator& operator++(); + + private: + Iterator(Set set, bool done); + + uint32_t NextSyscall() const; + + Set set_; + bool done_; + uint32_t num_; + + friend SyscallSet; + friend bool operator==(const Iterator&, const Iterator&); + DISALLOW_ASSIGN(Iterator); +}; + +SANDBOX_EXPORT bool operator==(const SyscallSet::Iterator& lhs, + const SyscallSet::Iterator& rhs); +SANDBOX_EXPORT bool operator!=(const SyscallSet::Iterator& lhs, + const SyscallSet::Iterator& rhs); + +} // namespace sandbox + +#endif // SANDBOX_LINUX_BPF_DSL_SYSCALL_SET_H__ diff --git a/sandbox/linux/bpf_dsl/syscall_set_unittest.cc b/sandbox/linux/bpf_dsl/syscall_set_unittest.cc new file mode 100644 index 0000000000..fafb6f6f73 --- /dev/null +++ b/sandbox/linux/bpf_dsl/syscall_set_unittest.cc @@ -0,0 +1,124 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/bpf_dsl/syscall_set.h" + +#include <stdint.h> + +#include "sandbox/linux/bpf_dsl/linux_syscall_ranges.h" +#include "sandbox/linux/tests/unit_tests.h" + +namespace sandbox { + +namespace { + +const SyscallSet kSyscallSets[] = { + SyscallSet::All(), + SyscallSet::InvalidOnly(), +}; + +SANDBOX_TEST(SyscallSet, Monotonous) { + for (const SyscallSet& set : kSyscallSets) { + uint32_t prev = 0; + bool have_prev = false; + for (uint32_t sysnum : set) { + if (have_prev) { + SANDBOX_ASSERT(sysnum > prev); + } else if (set == SyscallSet::All()) { + // The iterator should start at 0. + SANDBOX_ASSERT(sysnum == 0); + } + + prev = sysnum; + have_prev = true; + } + + // The iterator should always return 0xFFFFFFFFu as the last value. + SANDBOX_ASSERT(have_prev); + SANDBOX_ASSERT(prev == 0xFFFFFFFFu); + } +} + +// AssertRange checks that SyscallIterator produces all system call +// numbers in the inclusive range [min, max]. +void AssertRange(uint32_t min, uint32_t max) { + SANDBOX_ASSERT(min < max); + uint32_t prev = min - 1; + for (uint32_t sysnum : SyscallSet::All()) { + if (sysnum >= min && sysnum <= max) { + SANDBOX_ASSERT(prev == sysnum - 1); + prev = sysnum; + } + } + SANDBOX_ASSERT(prev == max); +} + +SANDBOX_TEST(SyscallSet, ValidSyscallRanges) { + AssertRange(MIN_SYSCALL, MAX_PUBLIC_SYSCALL); +#if defined(__arm__) + AssertRange(MIN_PRIVATE_SYSCALL, MAX_PRIVATE_SYSCALL); + AssertRange(MIN_GHOST_SYSCALL, MAX_SYSCALL); +#endif +} + +SANDBOX_TEST(SyscallSet, InvalidSyscalls) { + static const uint32_t kExpected[] = { +#if defined(__mips__) + 0, + MIN_SYSCALL - 1, +#endif + MAX_PUBLIC_SYSCALL + 1, +#if defined(__arm__) + MIN_PRIVATE_SYSCALL - 1, + MAX_PRIVATE_SYSCALL + 1, + MIN_GHOST_SYSCALL - 1, + MAX_SYSCALL + 1, +#endif + 0x7FFFFFFFu, + 0x80000000u, + 0xFFFFFFFFu, + }; + + for (const SyscallSet& set : kSyscallSets) { + size_t i = 0; + for (uint32_t sysnum : set) { + if (!SyscallSet::IsValid(sysnum)) { + SANDBOX_ASSERT(i < arraysize(kExpected)); + SANDBOX_ASSERT(kExpected[i] == sysnum); + ++i; + } + } + SANDBOX_ASSERT(i == arraysize(kExpected)); + } +} + +SANDBOX_TEST(SyscallSet, ValidOnlyIsOnlyValid) { + for (uint32_t sysnum : SyscallSet::ValidOnly()) { + SANDBOX_ASSERT(SyscallSet::IsValid(sysnum)); + } +} + +SANDBOX_TEST(SyscallSet, InvalidOnlyIsOnlyInvalid) { + for (uint32_t sysnum : SyscallSet::InvalidOnly()) { + SANDBOX_ASSERT(!SyscallSet::IsValid(sysnum)); + } +} + +SANDBOX_TEST(SyscallSet, AllIsValidOnlyPlusInvalidOnly) { + std::vector<uint32_t> merged; + const SyscallSet valid_only = SyscallSet::ValidOnly(); + const SyscallSet invalid_only = SyscallSet::InvalidOnly(); + std::merge(valid_only.begin(), + valid_only.end(), + invalid_only.begin(), + invalid_only.end(), + std::back_inserter(merged)); + + const SyscallSet all = SyscallSet::All(); + SANDBOX_ASSERT(merged == std::vector<uint32_t>(all.begin(), all.end())); +} + +} // namespace + +} // namespace sandbox diff --git a/sandbox/linux/bpf_dsl/verifier.cc b/sandbox/linux/bpf_dsl/verifier.cc new file mode 100644 index 0000000000..417c663e30 --- /dev/null +++ b/sandbox/linux/bpf_dsl/verifier.cc @@ -0,0 +1,396 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/bpf_dsl/verifier.h" + +#include <string.h> + +#include <limits> + +#include "sandbox/linux/bpf_dsl/bpf_dsl.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl_impl.h" +#include "sandbox/linux/bpf_dsl/policy.h" +#include "sandbox/linux/bpf_dsl/policy_compiler.h" +#include "sandbox/linux/bpf_dsl/seccomp_macros.h" +#include "sandbox/linux/bpf_dsl/syscall_set.h" +#include "sandbox/linux/seccomp-bpf/errorcode.h" +#include "sandbox/linux/system_headers/linux_filter.h" +#include "sandbox/linux/system_headers/linux_seccomp.h" + +namespace sandbox { +namespace bpf_dsl { + +namespace { + +const uint64_t kLower32Bits = std::numeric_limits<uint32_t>::max(); +const uint64_t kUpper32Bits = static_cast<uint64_t>(kLower32Bits) << 32; + +struct State { + State(const std::vector<struct sock_filter>& p, + const struct arch_seccomp_data& d) + : program(p), data(d), ip(0), accumulator(0), acc_is_valid(false) {} + const std::vector<struct sock_filter>& program; + const struct arch_seccomp_data& data; + unsigned int ip; + uint32_t accumulator; + bool acc_is_valid; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(State); +}; + +uint32_t EvaluateErrorCode(bpf_dsl::PolicyCompiler* compiler, + const ErrorCode& code, + const struct arch_seccomp_data& data) { + if (code.error_type() == ErrorCode::ET_SIMPLE || + code.error_type() == ErrorCode::ET_TRAP) { + return code.err(); + } else if (code.error_type() == ErrorCode::ET_COND) { + if (code.width() == ErrorCode::TP_32BIT && + (data.args[code.argno()] >> 32) && + (data.args[code.argno()] & 0xFFFFFFFF80000000ull) != + 0xFFFFFFFF80000000ull) { + return compiler->Unexpected64bitArgument().err(); + } + bool equal = (data.args[code.argno()] & code.mask()) == code.value(); + return EvaluateErrorCode(compiler, equal ? *code.passed() : *code.failed(), + data); + } else { + return SECCOMP_RET_INVALID; + } +} + +bool VerifyErrorCode(bpf_dsl::PolicyCompiler* compiler, + const std::vector<struct sock_filter>& program, + struct arch_seccomp_data* data, + const ErrorCode& root_code, + const ErrorCode& code, + const char** err) { + if (code.error_type() == ErrorCode::ET_SIMPLE || + code.error_type() == ErrorCode::ET_TRAP) { + const uint32_t computed_ret = Verifier::EvaluateBPF(program, *data, err); + if (*err) { + return false; + } + const uint32_t policy_ret = EvaluateErrorCode(compiler, root_code, *data); + if (computed_ret != policy_ret) { + // For efficiency's sake, we'd much rather compare "computed_ret" + // against "code.err()". This works most of the time, but it doesn't + // always work for nested conditional expressions. The test values + // that we generate on the fly to probe expressions can trigger + // code flow decisions in multiple nodes of the decision tree, and the + // only way to compute the correct error code in that situation is by + // calling EvaluateErrorCode(). + *err = "Exit code from BPF program doesn't match"; + return false; + } + } else if (code.error_type() == ErrorCode::ET_COND) { + if (code.argno() < 0 || code.argno() >= 6) { + *err = "Invalid argument number in error code"; + return false; + } + + // TODO(mdempsky): The test values generated here try to provide good + // coverage for generated BPF instructions while avoiding combinatorial + // explosion on large policies. Ideally we would instead take a fuzzing-like + // approach and generate a bounded number of test cases regardless of policy + // size. + + // Verify that we can check a value for simple equality. + data->args[code.argno()] = code.value(); + if (!VerifyErrorCode(compiler, program, data, root_code, *code.passed(), + err)) { + return false; + } + + // If mask ignores any bits, verify that setting those bits is still + // detected as equality. + uint64_t ignored_bits = ~code.mask(); + if (code.width() == ErrorCode::TP_32BIT) { + ignored_bits = static_cast<uint32_t>(ignored_bits); + } + if ((ignored_bits & kLower32Bits) != 0) { + data->args[code.argno()] = code.value() | (ignored_bits & kLower32Bits); + if (!VerifyErrorCode(compiler, program, data, root_code, *code.passed(), + err)) { + return false; + } + } + if ((ignored_bits & kUpper32Bits) != 0) { + data->args[code.argno()] = code.value() | (ignored_bits & kUpper32Bits); + if (!VerifyErrorCode(compiler, program, data, root_code, *code.passed(), + err)) { + return false; + } + } + + // Verify that changing bits included in the mask is detected as inequality. + if ((code.mask() & kLower32Bits) != 0) { + data->args[code.argno()] = code.value() ^ (code.mask() & kLower32Bits); + if (!VerifyErrorCode(compiler, program, data, root_code, *code.failed(), + err)) { + return false; + } + } + if ((code.mask() & kUpper32Bits) != 0) { + data->args[code.argno()] = code.value() ^ (code.mask() & kUpper32Bits); + if (!VerifyErrorCode(compiler, program, data, root_code, *code.failed(), + err)) { + return false; + } + } + + if (code.width() == ErrorCode::TP_32BIT) { + // For 32-bit system call arguments, we emit additional instructions to + // validate the upper 32-bits. Here we test that validation. + + // Arbitrary 64-bit values should be rejected. + data->args[code.argno()] = 1ULL << 32; + if (!VerifyErrorCode(compiler, program, data, root_code, + compiler->Unexpected64bitArgument(), err)) { + return false; + } + + // Upper 32-bits set without the MSB of the lower 32-bits set should be + // rejected too. + data->args[code.argno()] = kUpper32Bits; + if (!VerifyErrorCode(compiler, program, data, root_code, + compiler->Unexpected64bitArgument(), err)) { + return false; + } + } + } else { + *err = "Attempting to return invalid error code from BPF program"; + return false; + } + return true; +} + +void Ld(State* state, const struct sock_filter& insn, const char** err) { + if (BPF_SIZE(insn.code) != BPF_W || BPF_MODE(insn.code) != BPF_ABS || + insn.jt != 0 || insn.jf != 0) { + *err = "Invalid BPF_LD instruction"; + return; + } + if (insn.k < sizeof(struct arch_seccomp_data) && (insn.k & 3) == 0) { + // We only allow loading of properly aligned 32bit quantities. + memcpy(&state->accumulator, + reinterpret_cast<const char*>(&state->data) + insn.k, 4); + } else { + *err = "Invalid operand in BPF_LD instruction"; + return; + } + state->acc_is_valid = true; + return; +} + +void Jmp(State* state, const struct sock_filter& insn, const char** err) { + if (BPF_OP(insn.code) == BPF_JA) { + if (state->ip + insn.k + 1 >= state->program.size() || + state->ip + insn.k + 1 <= state->ip) { + compilation_failure: + *err = "Invalid BPF_JMP instruction"; + return; + } + state->ip += insn.k; + } else { + if (BPF_SRC(insn.code) != BPF_K || !state->acc_is_valid || + state->ip + insn.jt + 1 >= state->program.size() || + state->ip + insn.jf + 1 >= state->program.size()) { + goto compilation_failure; + } + switch (BPF_OP(insn.code)) { + case BPF_JEQ: + if (state->accumulator == insn.k) { + state->ip += insn.jt; + } else { + state->ip += insn.jf; + } + break; + case BPF_JGT: + if (state->accumulator > insn.k) { + state->ip += insn.jt; + } else { + state->ip += insn.jf; + } + break; + case BPF_JGE: + if (state->accumulator >= insn.k) { + state->ip += insn.jt; + } else { + state->ip += insn.jf; + } + break; + case BPF_JSET: + if (state->accumulator & insn.k) { + state->ip += insn.jt; + } else { + state->ip += insn.jf; + } + break; + default: + goto compilation_failure; + } + } +} + +uint32_t Ret(State*, const struct sock_filter& insn, const char** err) { + if (BPF_SRC(insn.code) != BPF_K) { + *err = "Invalid BPF_RET instruction"; + return 0; + } + return insn.k; +} + +void Alu(State* state, const struct sock_filter& insn, const char** err) { + if (BPF_OP(insn.code) == BPF_NEG) { + state->accumulator = -state->accumulator; + return; + } else { + if (BPF_SRC(insn.code) != BPF_K) { + *err = "Unexpected source operand in arithmetic operation"; + return; + } + switch (BPF_OP(insn.code)) { + case BPF_ADD: + state->accumulator += insn.k; + break; + case BPF_SUB: + state->accumulator -= insn.k; + break; + case BPF_MUL: + state->accumulator *= insn.k; + break; + case BPF_DIV: + if (!insn.k) { + *err = "Illegal division by zero"; + break; + } + state->accumulator /= insn.k; + break; + case BPF_MOD: + if (!insn.k) { + *err = "Illegal division by zero"; + break; + } + state->accumulator %= insn.k; + break; + case BPF_OR: + state->accumulator |= insn.k; + break; + case BPF_XOR: + state->accumulator ^= insn.k; + break; + case BPF_AND: + state->accumulator &= insn.k; + break; + case BPF_LSH: + if (insn.k > 32) { + *err = "Illegal shift operation"; + break; + } + state->accumulator <<= insn.k; + break; + case BPF_RSH: + if (insn.k > 32) { + *err = "Illegal shift operation"; + break; + } + state->accumulator >>= insn.k; + break; + default: + *err = "Invalid operator in arithmetic operation"; + break; + } + } +} + +} // namespace + +bool Verifier::VerifyBPF(bpf_dsl::PolicyCompiler* compiler, + const std::vector<struct sock_filter>& program, + const bpf_dsl::Policy& policy, + const char** err) { + *err = NULL; + for (uint32_t sysnum : SyscallSet::All()) { + // We ideally want to iterate over the full system call range and values + // just above and just below this range. This gives us the full result set + // of the "evaluators". + // On Intel systems, this can fail in a surprising way, as a cleared bit 30 + // indicates either i386 or x86-64; and a set bit 30 indicates x32. And + // unless we pay attention to setting this bit correctly, an early check in + // our BPF program will make us fail with a misleading error code. + struct arch_seccomp_data data = {static_cast<int>(sysnum), + static_cast<uint32_t>(SECCOMP_ARCH)}; +#if defined(__i386__) || defined(__x86_64__) +#if defined(__x86_64__) && defined(__ILP32__) + if (!(sysnum & 0x40000000u)) { + continue; + } +#else + if (sysnum & 0x40000000u) { + continue; + } +#endif +#endif + ErrorCode code = SyscallSet::IsValid(sysnum) + ? policy.EvaluateSyscall(sysnum)->Compile(compiler) + : policy.InvalidSyscall()->Compile(compiler); + if (!VerifyErrorCode(compiler, program, &data, code, code, err)) { + return false; + } + } + return true; +} + +uint32_t Verifier::EvaluateBPF(const std::vector<struct sock_filter>& program, + const struct arch_seccomp_data& data, + const char** err) { + *err = NULL; + if (program.size() < 1 || program.size() >= SECCOMP_MAX_PROGRAM_SIZE) { + *err = "Invalid program length"; + return 0; + } + for (State state(program, data); !*err; ++state.ip) { + if (state.ip >= program.size()) { + *err = "Invalid instruction pointer in BPF program"; + break; + } + const struct sock_filter& insn = program[state.ip]; + switch (BPF_CLASS(insn.code)) { + case BPF_LD: + Ld(&state, insn, err); + break; + case BPF_JMP: + Jmp(&state, insn, err); + break; + case BPF_RET: { + uint32_t r = Ret(&state, insn, err); + switch (r & SECCOMP_RET_ACTION) { + case SECCOMP_RET_TRAP: + case SECCOMP_RET_ERRNO: + case SECCOMP_RET_TRACE: + case SECCOMP_RET_ALLOW: + break; + case SECCOMP_RET_KILL: // We don't ever generate this + case SECCOMP_RET_INVALID: // Should never show up in BPF program + default: + *err = "Unexpected return code found in BPF program"; + return 0; + } + return r; + } + case BPF_ALU: + Alu(&state, insn, err); + break; + default: + *err = "Unexpected instruction in BPF program"; + break; + } + } + return 0; +} + +} // namespace bpf_dsl +} // namespace sandbox diff --git a/sandbox/linux/bpf_dsl/verifier.h b/sandbox/linux/bpf_dsl/verifier.h new file mode 100644 index 0000000000..b0435d1aa1 --- /dev/null +++ b/sandbox/linux/bpf_dsl/verifier.h @@ -0,0 +1,57 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_BPF_DSL_VERIFIER_H__ +#define SANDBOX_LINUX_BPF_DSL_VERIFIER_H__ + +#include <stdint.h> + +#include <vector> + +#include "base/macros.h" +#include "sandbox/sandbox_export.h" + +struct sock_filter; + +namespace sandbox { +struct arch_seccomp_data; + +namespace bpf_dsl { +class Policy; +class PolicyCompiler; + +class SANDBOX_EXPORT Verifier { + public: + // Evaluate the BPF program for all possible inputs and verify that it + // computes the correct result. We use the "evaluators" to determine + // the full set of possible inputs that we have to iterate over. + // Returns success, if the BPF filter accurately reflects the rules + // set by the "evaluators". + // Upon success, "err" is set to NULL. Upon failure, it contains a static + // error message that does not need to be free()'d. + static bool VerifyBPF(bpf_dsl::PolicyCompiler* compiler, + const std::vector<struct sock_filter>& program, + const bpf_dsl::Policy& policy, + const char** err); + + // Evaluate a given BPF program for a particular set of system call + // parameters. If evaluation failed for any reason, "err" will be set to + // a non-NULL error string. Otherwise, the BPF program's result will be + // returned by the function and "err" is NULL. + // We do not actually implement the full BPF state machine, but only the + // parts that can actually be generated by our BPF compiler. If this code + // is used for purposes other than verifying the output of the sandbox's + // BPF compiler, we might have to extend this BPF interpreter. + static uint32_t EvaluateBPF(const std::vector<struct sock_filter>& program, + const struct arch_seccomp_data& data, + const char** err); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Verifier); +}; + +} // namespace bpf_dsl +} // namespace sandbox + +#endif // SANDBOX_LINUX_BPF_DSL_VERIFIER_H__ diff --git a/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc b/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc new file mode 100644 index 0000000000..8c679a3d41 --- /dev/null +++ b/sandbox/linux/seccomp-bpf-helpers/baseline_policy.cc @@ -0,0 +1,270 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf-helpers/baseline_policy.h" + +#include <errno.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include "base/logging.h" +#include "build/build_config.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl.h" +#include "sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h" +#include "sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h" +#include "sandbox/linux/seccomp-bpf-helpers/syscall_sets.h" +#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" +#include "sandbox/linux/services/syscall_wrappers.h" +#include "sandbox/linux/system_headers/linux_syscalls.h" + +// Changing this implementation will have an effect on *all* policies. +// Currently this means: Renderer/Worker, GPU, Flash and NaCl. + +using sandbox::bpf_dsl::Allow; +using sandbox::bpf_dsl::Arg; +using sandbox::bpf_dsl::Error; +using sandbox::bpf_dsl::If; +using sandbox::bpf_dsl::ResultExpr; + +namespace sandbox { + +namespace { + +bool IsBaselinePolicyAllowed(int sysno) { + return SyscallSets::IsAllowedAddressSpaceAccess(sysno) || + SyscallSets::IsAllowedBasicScheduler(sysno) || + SyscallSets::IsAllowedEpoll(sysno) || + SyscallSets::IsAllowedFileSystemAccessViaFd(sysno) || + SyscallSets::IsAllowedFutex(sysno) || + SyscallSets::IsAllowedGeneralIo(sysno) || + SyscallSets::IsAllowedGetOrModifySocket(sysno) || + SyscallSets::IsAllowedGettime(sysno) || + SyscallSets::IsAllowedProcessStartOrDeath(sysno) || + SyscallSets::IsAllowedSignalHandling(sysno) || + SyscallSets::IsGetSimpleId(sysno) || + SyscallSets::IsKernelInternalApi(sysno) || +#if defined(__arm__) + SyscallSets::IsArmPrivate(sysno) || +#endif +#if defined(__mips__) + SyscallSets::IsMipsPrivate(sysno) || +#endif + SyscallSets::IsAllowedOperationOnFd(sysno); +} + +// System calls that will trigger the crashing SIGSYS handler. +bool IsBaselinePolicyWatched(int sysno) { + return SyscallSets::IsAdminOperation(sysno) || + SyscallSets::IsAdvancedScheduler(sysno) || + SyscallSets::IsAdvancedTimer(sysno) || + SyscallSets::IsAsyncIo(sysno) || + SyscallSets::IsDebug(sysno) || + SyscallSets::IsEventFd(sysno) || + SyscallSets::IsExtendedAttributes(sysno) || + SyscallSets::IsFaNotify(sysno) || + SyscallSets::IsFsControl(sysno) || + SyscallSets::IsGlobalFSViewChange(sysno) || + SyscallSets::IsGlobalProcessEnvironment(sysno) || + SyscallSets::IsGlobalSystemStatus(sysno) || + SyscallSets::IsInotify(sysno) || + SyscallSets::IsKernelModule(sysno) || + SyscallSets::IsKeyManagement(sysno) || + SyscallSets::IsKill(sysno) || + SyscallSets::IsMessageQueue(sysno) || + SyscallSets::IsMisc(sysno) || +#if defined(__x86_64__) + SyscallSets::IsNetworkSocketInformation(sysno) || +#endif + SyscallSets::IsNuma(sysno) || + SyscallSets::IsPrctl(sysno) || + SyscallSets::IsProcessGroupOrSession(sysno) || +#if defined(__i386__) || defined(__mips__) + SyscallSets::IsSocketCall(sysno) || +#endif +#if defined(__arm__) + SyscallSets::IsArmPciConfig(sysno) || +#endif +#if defined(__mips__) + SyscallSets::IsMipsMisc(sysno) || +#endif + SyscallSets::IsTimer(sysno); +} + +// |fs_denied_errno| is the errno return for denied filesystem access. +ResultExpr EvaluateSyscallImpl(int fs_denied_errno, + pid_t current_pid, + int sysno) { +#if defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || \ + defined(MEMORY_SANITIZER) + // TCGETS is required by the sanitizers on failure. + if (sysno == __NR_ioctl) { + return RestrictIoctl(); + } + + if (sysno == __NR_sched_getaffinity) { + return Allow(); + } + + // Used when RSS limiting is enabled in sanitizers. + if (sysno == __NR_getrusage) { + return RestrictGetrusage(); + } + + if (sysno == __NR_sigaltstack) { + // Required for better stack overflow detection in ASan. Disallowed in + // non-ASan builds. + return Allow(); + } +#endif // defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) || + // defined(MEMORY_SANITIZER) + + if (IsBaselinePolicyAllowed(sysno)) { + return Allow(); + } + +#if defined(OS_ANDROID) + // Needed for thread creation. + if (sysno == __NR_sigaltstack) + return Allow(); +#endif + + if (sysno == __NR_clock_gettime) { + return RestrictClockID(); + } + + if (sysno == __NR_clone) { + return RestrictCloneToThreadsAndEPERMFork(); + } + + if (sysno == __NR_fcntl) + return RestrictFcntlCommands(); + +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + if (sysno == __NR_fcntl64) + return RestrictFcntlCommands(); +#endif + +#if !defined(__aarch64__) + // fork() is never used as a system call (clone() is used instead), but we + // have seen it in fallback code on Android. + if (sysno == __NR_fork) { + return Error(EPERM); + } +#endif + + if (sysno == __NR_futex) + return RestrictFutex(); + + if (sysno == __NR_set_robust_list) + return Error(EPERM); + + if (sysno == __NR_getpriority || sysno ==__NR_setpriority) + return RestrictGetSetpriority(current_pid); + + if (sysno == __NR_madvise) { + // Only allow MADV_DONTNEED (aka MADV_FREE). + const Arg<int> advice(2); + return If(advice == MADV_DONTNEED, Allow()).Else(Error(EPERM)); + } + +#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \ + defined(__aarch64__) + if (sysno == __NR_mmap) + return RestrictMmapFlags(); +#endif + +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + if (sysno == __NR_mmap2) + return RestrictMmapFlags(); +#endif + + if (sysno == __NR_mprotect) + return RestrictMprotectFlags(); + + if (sysno == __NR_prctl) + return RestrictPrctl(); + +#if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \ + defined(__aarch64__) + if (sysno == __NR_socketpair) { + // Only allow AF_UNIX, PF_UNIX. Crash if anything else is seen. + static_assert(AF_UNIX == PF_UNIX, + "af_unix and pf_unix should not be different"); + const Arg<int> domain(0); + return If(domain == AF_UNIX, Allow()).Else(CrashSIGSYS()); + } +#endif + + if (SyscallSets::IsKill(sysno)) { + return RestrictKillTarget(current_pid, sysno); + } + + if (SyscallSets::IsFileSystem(sysno) || + SyscallSets::IsCurrentDirectory(sysno)) { + return Error(fs_denied_errno); + } + + if (SyscallSets::IsSeccomp(sysno)) + return Error(EPERM); + + if (SyscallSets::IsAnySystemV(sysno)) { + return Error(EPERM); + } + + if (SyscallSets::IsUmask(sysno) || + SyscallSets::IsDeniedFileSystemAccessViaFd(sysno) || + SyscallSets::IsDeniedGetOrModifySocket(sysno) || + SyscallSets::IsProcessPrivilegeChange(sysno)) { + return Error(EPERM); + } + +#if defined(__i386__) || defined(__mips__) + if (SyscallSets::IsSocketCall(sysno)) + return RestrictSocketcallCommand(); +#endif + + if (IsBaselinePolicyWatched(sysno)) { + // Previously unseen syscalls. TODO(jln): some of these should + // be denied gracefully right away. + return CrashSIGSYS(); + } + + // In any other case crash the program with our SIGSYS handler. + return CrashSIGSYS(); +} + +} // namespace. + +// Unfortunately C++03 doesn't allow delegated constructors. +// Call other constructor when C++11 lands. +BaselinePolicy::BaselinePolicy() : BaselinePolicy(EPERM) {} + +BaselinePolicy::BaselinePolicy(int fs_denied_errno) + : fs_denied_errno_(fs_denied_errno), policy_pid_(sys_getpid()) { +} + +BaselinePolicy::~BaselinePolicy() { + // Make sure that this policy is created, used and destroyed by a single + // process. + DCHECK_EQ(sys_getpid(), policy_pid_); +} + +ResultExpr BaselinePolicy::EvaluateSyscall(int sysno) const { + // Sanity check that we're only called with valid syscall numbers. + DCHECK(SandboxBPF::IsValidSyscallNumber(sysno)); + // Make sure that this policy is used in the creating process. + if (1 == sysno) { + DCHECK_EQ(sys_getpid(), policy_pid_); + } + return EvaluateSyscallImpl(fs_denied_errno_, policy_pid_, sysno); +} + +ResultExpr BaselinePolicy::InvalidSyscall() const { + return CrashSIGSYS(); +} + +} // namespace sandbox. diff --git a/sandbox/linux/seccomp-bpf-helpers/baseline_policy.h b/sandbox/linux/seccomp-bpf-helpers/baseline_policy.h new file mode 100644 index 0000000000..4169d9c3e2 --- /dev/null +++ b/sandbox/linux/seccomp-bpf-helpers/baseline_policy.h @@ -0,0 +1,48 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SECCOMP_BPF_HELPERS_BASELINE_POLICY_H_ +#define SANDBOX_LINUX_SECCOMP_BPF_HELPERS_BASELINE_POLICY_H_ + +#include "sandbox/linux/bpf_dsl/bpf_dsl_forward.h" +#include "sandbox/linux/bpf_dsl/policy.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +// This is a helper to build seccomp-bpf policies, i.e. policies for a sandbox +// that reduces the Linux kernel's attack surface. Given its nature, it doesn't +// have a clear semantics and is mostly "implementation-defined". +// +// This class implements the Policy interface with a "baseline" +// policy for use within Chromium. +// The "baseline" policy is somewhat arbitrary. All Chromium policies are an +// alteration of it, and it represents a reasonable common ground to run most +// code in a sandboxed environment. +// A baseline policy is only valid for the process for which this object was +// instantiated (so do not fork() and use it in a child). +class SANDBOX_EXPORT BaselinePolicy : public bpf_dsl::Policy { + public: + BaselinePolicy(); + // |fs_denied_errno| is the errno returned when a filesystem access system + // call is denied. + explicit BaselinePolicy(int fs_denied_errno); + ~BaselinePolicy() override; + + bpf_dsl::ResultExpr EvaluateSyscall(int system_call_number) const override; + bpf_dsl::ResultExpr InvalidSyscall() const override; + pid_t policy_pid() const { return policy_pid_; } + + private: + int fs_denied_errno_; + + // The PID that the policy applies to (should be equal to the current pid). + pid_t policy_pid_; + + DISALLOW_COPY_AND_ASSIGN(BaselinePolicy); +}; + +} // namespace sandbox. + +#endif // SANDBOX_LINUX_SECCOMP_BPF_HELPERS_BASELINE_POLICY_H_ diff --git a/sandbox/linux/seccomp-bpf-helpers/baseline_policy_unittest.cc b/sandbox/linux/seccomp-bpf-helpers/baseline_policy_unittest.cc new file mode 100644 index 0000000000..614849f61c --- /dev/null +++ b/sandbox/linux/seccomp-bpf-helpers/baseline_policy_unittest.cc @@ -0,0 +1,334 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf-helpers/baseline_policy.h" + +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <signal.h> +#include <string.h> +#include <sys/prctl.h> +#include <sys/resource.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <unistd.h> + +#include "base/files/scoped_file.h" +#include "base/macros.h" +#include "base/posix/eintr_wrapper.h" +#include "base/threading/thread.h" +#include "build/build_config.h" +#include "sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h" +#include "sandbox/linux/seccomp-bpf/bpf_tests.h" +#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" +#include "sandbox/linux/seccomp-bpf/syscall.h" +#include "sandbox/linux/services/syscall_wrappers.h" +#include "sandbox/linux/services/thread_helpers.h" +#include "sandbox/linux/system_headers/linux_futex.h" +#include "sandbox/linux/system_headers/linux_syscalls.h" +#include "sandbox/linux/tests/test_utils.h" +#include "sandbox/linux/tests/unit_tests.h" + +namespace sandbox { + +namespace { + +// This also tests that read(), write() and fstat() are allowed. +void TestPipeOrSocketPair(base::ScopedFD read_end, base::ScopedFD write_end) { + BPF_ASSERT_LE(0, read_end.get()); + BPF_ASSERT_LE(0, write_end.get()); + struct stat stat_buf; + int sys_ret = fstat(read_end.get(), &stat_buf); + BPF_ASSERT_EQ(0, sys_ret); + BPF_ASSERT(S_ISFIFO(stat_buf.st_mode) || S_ISSOCK(stat_buf.st_mode)); + + const ssize_t kTestTransferSize = 4; + static const char kTestString[kTestTransferSize] = {'T', 'E', 'S', 'T'}; + ssize_t transfered = 0; + + transfered = + HANDLE_EINTR(write(write_end.get(), kTestString, kTestTransferSize)); + BPF_ASSERT_EQ(kTestTransferSize, transfered); + char read_buf[kTestTransferSize + 1] = {0}; + transfered = HANDLE_EINTR(read(read_end.get(), read_buf, sizeof(read_buf))); + BPF_ASSERT_EQ(kTestTransferSize, transfered); + BPF_ASSERT_EQ(0, memcmp(kTestString, read_buf, kTestTransferSize)); +} + +// Test that a few easy-to-test system calls are allowed. +BPF_TEST_C(BaselinePolicy, BaselinePolicyBasicAllowed, BaselinePolicy) { + BPF_ASSERT_EQ(0, sched_yield()); + + int pipefd[2]; + int sys_ret = pipe(pipefd); + BPF_ASSERT_EQ(0, sys_ret); + TestPipeOrSocketPair(base::ScopedFD(pipefd[0]), base::ScopedFD(pipefd[1])); + + BPF_ASSERT_LE(1, getpid()); + BPF_ASSERT_LE(0, getuid()); +} + +BPF_TEST_C(BaselinePolicy, FchmodErrno, BaselinePolicy) { + int ret = fchmod(-1, 07777); + BPF_ASSERT_EQ(-1, ret); + // Without the sandbox, this would EBADF instead. + BPF_ASSERT_EQ(EPERM, errno); +} + +BPF_TEST_C(BaselinePolicy, ForkErrno, BaselinePolicy) { + errno = 0; + pid_t pid = fork(); + const int fork_errno = errno; + TestUtils::HandlePostForkReturn(pid); + + BPF_ASSERT_EQ(-1, pid); + BPF_ASSERT_EQ(EPERM, fork_errno); +} + +pid_t ForkX86Glibc() { + static pid_t ptid; + return sys_clone(CLONE_PARENT_SETTID | SIGCHLD, nullptr, &ptid, nullptr, + nullptr); +} + +BPF_TEST_C(BaselinePolicy, ForkX86Eperm, BaselinePolicy) { + errno = 0; + pid_t pid = ForkX86Glibc(); + const int fork_errno = errno; + TestUtils::HandlePostForkReturn(pid); + + BPF_ASSERT_EQ(-1, pid); + BPF_ASSERT_EQ(EPERM, fork_errno); +} + +pid_t ForkARMGlibc() { + static pid_t ctid; + return sys_clone(CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | SIGCHLD, nullptr, + nullptr, &ctid, nullptr); +} + +BPF_TEST_C(BaselinePolicy, ForkArmEperm, BaselinePolicy) { + errno = 0; + pid_t pid = ForkARMGlibc(); + const int fork_errno = errno; + TestUtils::HandlePostForkReturn(pid); + + BPF_ASSERT_EQ(-1, pid); + BPF_ASSERT_EQ(EPERM, fork_errno); +} + +BPF_TEST_C(BaselinePolicy, CreateThread, BaselinePolicy) { + base::Thread thread("sandbox_tests"); + BPF_ASSERT(thread.Start()); +} + +BPF_DEATH_TEST_C(BaselinePolicy, + DisallowedCloneFlagCrashes, + DEATH_SEGV_MESSAGE(GetCloneErrorMessageContentForTests()), + BaselinePolicy) { + pid_t pid = sys_clone(CLONE_THREAD | SIGCHLD); + TestUtils::HandlePostForkReturn(pid); +} + +BPF_DEATH_TEST_C(BaselinePolicy, + DisallowedKillCrashes, + DEATH_SEGV_MESSAGE(GetKillErrorMessageContentForTests()), + BaselinePolicy) { + BPF_ASSERT_NE(1, getpid()); + kill(1, 0); + _exit(0); +} + +BPF_TEST_C(BaselinePolicy, CanKillSelf, BaselinePolicy) { + int sys_ret = kill(getpid(), 0); + BPF_ASSERT_EQ(0, sys_ret); +} + +BPF_TEST_C(BaselinePolicy, Socketpair, BaselinePolicy) { + int sv[2]; + int sys_ret = socketpair(AF_UNIX, SOCK_DGRAM, 0, sv); + BPF_ASSERT_EQ(0, sys_ret); + TestPipeOrSocketPair(base::ScopedFD(sv[0]), base::ScopedFD(sv[1])); + + sys_ret = socketpair(AF_UNIX, SOCK_SEQPACKET, 0, sv); + BPF_ASSERT_EQ(0, sys_ret); + TestPipeOrSocketPair(base::ScopedFD(sv[0]), base::ScopedFD(sv[1])); +} + +// Not all architectures can restrict the domain for socketpair(). +#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) +BPF_DEATH_TEST_C(BaselinePolicy, + SocketpairWrongDomain, + DEATH_SEGV_MESSAGE(GetErrorMessageContentForTests()), + BaselinePolicy) { + int sv[2]; + ignore_result(socketpair(AF_INET, SOCK_STREAM, 0, sv)); + _exit(1); +} +#endif // defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) + +BPF_TEST_C(BaselinePolicy, EPERM_open, BaselinePolicy) { + errno = 0; + int sys_ret = open("/proc/cpuinfo", O_RDONLY); + BPF_ASSERT_EQ(-1, sys_ret); + BPF_ASSERT_EQ(EPERM, errno); +} + +BPF_TEST_C(BaselinePolicy, EPERM_access, BaselinePolicy) { + errno = 0; + int sys_ret = access("/proc/cpuinfo", R_OK); + BPF_ASSERT_EQ(-1, sys_ret); + BPF_ASSERT_EQ(EPERM, errno); +} + +BPF_TEST_C(BaselinePolicy, EPERM_getcwd, BaselinePolicy) { + errno = 0; + char buf[1024]; + char* cwd = getcwd(buf, sizeof(buf)); + BPF_ASSERT_EQ(NULL, cwd); + BPF_ASSERT_EQ(EPERM, errno); +} + +BPF_DEATH_TEST_C(BaselinePolicy, + SIGSYS_InvalidSyscall, + DEATH_SEGV_MESSAGE(GetErrorMessageContentForTests()), + BaselinePolicy) { + Syscall::InvalidCall(); +} + +// A failing test using this macro could be problematic since we perform +// system calls by passing "0" as every argument. +// The kernel could SIGSEGV the process or the system call itself could reboot +// the machine. Some thoughts have been given when hand-picking the system +// calls below to limit any potential side effects outside of the current +// process. +#define TEST_BASELINE_SIGSYS(sysno) \ + BPF_DEATH_TEST_C(BaselinePolicy, \ + SIGSYS_##sysno, \ + DEATH_SEGV_MESSAGE(GetErrorMessageContentForTests()), \ + BaselinePolicy) { \ + syscall(sysno, 0, 0, 0, 0, 0, 0); \ + _exit(1); \ + } + +TEST_BASELINE_SIGSYS(__NR_acct); +TEST_BASELINE_SIGSYS(__NR_chroot); +TEST_BASELINE_SIGSYS(__NR_fanotify_init); +TEST_BASELINE_SIGSYS(__NR_fgetxattr); +TEST_BASELINE_SIGSYS(__NR_getcpu); +TEST_BASELINE_SIGSYS(__NR_getitimer); +TEST_BASELINE_SIGSYS(__NR_init_module); +TEST_BASELINE_SIGSYS(__NR_io_cancel); +TEST_BASELINE_SIGSYS(__NR_keyctl); +TEST_BASELINE_SIGSYS(__NR_mq_open); +TEST_BASELINE_SIGSYS(__NR_ptrace); +TEST_BASELINE_SIGSYS(__NR_sched_setaffinity); +TEST_BASELINE_SIGSYS(__NR_setpgid); +TEST_BASELINE_SIGSYS(__NR_swapon); +TEST_BASELINE_SIGSYS(__NR_sysinfo); +TEST_BASELINE_SIGSYS(__NR_syslog); +TEST_BASELINE_SIGSYS(__NR_timer_create); + +#if !defined(__aarch64__) +TEST_BASELINE_SIGSYS(__NR_eventfd); +TEST_BASELINE_SIGSYS(__NR_inotify_init); +TEST_BASELINE_SIGSYS(__NR_vserver); +#endif + +BPF_DEATH_TEST_C(BaselinePolicy, + FutexWithRequeuePriorityInheritence, + DEATH_SEGV_MESSAGE(GetFutexErrorMessageContentForTests()), + BaselinePolicy) { + syscall(__NR_futex, NULL, FUTEX_CMP_REQUEUE_PI, 0, NULL, NULL, 0); + _exit(1); +} + +BPF_DEATH_TEST_C(BaselinePolicy, + FutexWithRequeuePriorityInheritencePrivate, + DEATH_SEGV_MESSAGE(GetFutexErrorMessageContentForTests()), + BaselinePolicy) { + syscall(__NR_futex, NULL, FUTEX_CMP_REQUEUE_PI_PRIVATE, 0, NULL, NULL, 0); + _exit(1); +} + +BPF_DEATH_TEST_C(BaselinePolicy, + FutexWithUnlockPIPrivate, + DEATH_SEGV_MESSAGE(GetFutexErrorMessageContentForTests()), + BaselinePolicy) { + syscall(__NR_futex, NULL, FUTEX_UNLOCK_PI_PRIVATE, 0, NULL, NULL, 0); + _exit(1); +} + +BPF_TEST_C(BaselinePolicy, PrctlDumpable, BaselinePolicy) { + const int is_dumpable = prctl(PR_GET_DUMPABLE, 0, 0, 0, 0); + BPF_ASSERT(is_dumpable == 1 || is_dumpable == 0); + const int prctl_ret = prctl(PR_SET_DUMPABLE, is_dumpable, 0, 0, 0, 0); + BPF_ASSERT_EQ(0, prctl_ret); +} + +// Workaround incomplete Android headers. +#if !defined(PR_CAPBSET_READ) +#define PR_CAPBSET_READ 23 +#endif + +BPF_DEATH_TEST_C(BaselinePolicy, + PrctlSigsys, + DEATH_SEGV_MESSAGE(GetPrctlErrorMessageContentForTests()), + BaselinePolicy) { + prctl(PR_CAPBSET_READ, 0, 0, 0, 0); + _exit(1); +} + +BPF_TEST_C(BaselinePolicy, GetOrSetPriority, BaselinePolicy) { + errno = 0; + const int original_prio = getpriority(PRIO_PROCESS, 0); + // Check errno instead of the return value since this system call can return + // -1 as a valid value. + BPF_ASSERT_EQ(0, errno); + + errno = 0; + int rc = getpriority(PRIO_PROCESS, getpid()); + BPF_ASSERT_EQ(0, errno); + + rc = getpriority(PRIO_PROCESS, getpid() + 1); + BPF_ASSERT_EQ(-1, rc); + BPF_ASSERT_EQ(EPERM, errno); + + rc = setpriority(PRIO_PROCESS, 0, original_prio); + BPF_ASSERT_EQ(0, rc); + + rc = setpriority(PRIO_PROCESS, getpid(), original_prio); + BPF_ASSERT_EQ(0, rc); + + errno = 0; + rc = setpriority(PRIO_PROCESS, getpid() + 1, original_prio); + BPF_ASSERT_EQ(-1, rc); + BPF_ASSERT_EQ(EPERM, errno); +} + +BPF_DEATH_TEST_C(BaselinePolicy, + GetPrioritySigsys, + DEATH_SEGV_MESSAGE(GetErrorMessageContentForTests()), + BaselinePolicy) { + getpriority(PRIO_USER, 0); + _exit(1); +} + +BPF_DEATH_TEST_C(BaselinePolicy, + ClockGettimeWithDisallowedClockCrashes, + DEATH_SEGV_MESSAGE(sandbox::GetErrorMessageContentForTests()), + BaselinePolicy) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); +} + +} // namespace + +} // namespace sandbox diff --git a/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.cc b/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.cc new file mode 100644 index 0000000000..05250d147f --- /dev/null +++ b/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.cc @@ -0,0 +1,297 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Note: any code in this file MUST be async-signal safe. + +#include "sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h" + +#include <sys/syscall.h> +#include <unistd.h> + +#include "base/logging.h" +#include "base/posix/eintr_wrapper.h" +#include "build/build_config.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl.h" +#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" +#include "sandbox/linux/seccomp-bpf/syscall.h" +#include "sandbox/linux/services/syscall_wrappers.h" +#include "sandbox/linux/system_headers/linux_syscalls.h" + +#if defined(__mips__) +// __NR_Linux, is defined in <asm/unistd.h>. +#include <asm/unistd.h> +#endif + +#define SECCOMP_MESSAGE_COMMON_CONTENT "seccomp-bpf failure" +#define SECCOMP_MESSAGE_CLONE_CONTENT "clone() failure" +#define SECCOMP_MESSAGE_PRCTL_CONTENT "prctl() failure" +#define SECCOMP_MESSAGE_IOCTL_CONTENT "ioctl() failure" +#define SECCOMP_MESSAGE_KILL_CONTENT "(tg)kill() failure" +#define SECCOMP_MESSAGE_FUTEX_CONTENT "futex() failure" + +namespace { + +inline bool IsArchitectureX86_64() { +#if defined(__x86_64__) + return true; +#else + return false; +#endif +} + +// Write |error_message| to stderr. Similar to RawLog(), but a bit more careful +// about async-signal safety. |size| is the size to write and should typically +// not include a terminating \0. +void WriteToStdErr(const char* error_message, size_t size) { + while (size > 0) { + // TODO(jln): query the current policy to check if send() is available and + // use it to perform a non-blocking write. + const int ret = HANDLE_EINTR(write(STDERR_FILENO, error_message, size)); + // We can't handle any type of error here. + if (ret <= 0 || static_cast<size_t>(ret) > size) break; + size -= ret; + error_message += ret; + } +} + +// Invalid syscall values are truncated to zero. +// On architectures where base value is zero (Intel and Arm), +// syscall number is the same as offset from base. +// This function returns values between 0 and 1023 on all architectures. +// On architectures where base value is different than zero (currently only +// Mips), we are truncating valid syscall values to offset from base. +uint32_t SyscallNumberToOffsetFromBase(uint32_t sysno) { +#if defined(__mips__) + // On MIPS syscall numbers are in different range than on x86 and ARM. + // Valid MIPS O32 ABI syscall __NR_syscall will be truncated to zero for + // simplicity. + sysno = sysno - __NR_Linux; +#endif + + if (sysno >= 1024) + sysno = 0; + + return sysno; +} + +// Print a seccomp-bpf failure to handle |sysno| to stderr in an +// async-signal safe way. +void PrintSyscallError(uint32_t sysno) { + if (sysno >= 1024) + sysno = 0; + // TODO(markus): replace with async-signal safe snprintf when available. + const size_t kNumDigits = 4; + char sysno_base10[kNumDigits]; + uint32_t rem = sysno; + uint32_t mod = 0; + for (int i = kNumDigits - 1; i >= 0; i--) { + mod = rem % 10; + rem /= 10; + sysno_base10[i] = '0' + mod; + } +#if defined(__mips__) && (_MIPS_SIM == _MIPS_SIM_ABI32) + static const char kSeccompErrorPrefix[] = __FILE__ + ":**CRASHING**:" SECCOMP_MESSAGE_COMMON_CONTENT " in syscall 4000 + "; +#else + static const char kSeccompErrorPrefix[] = + __FILE__":**CRASHING**:" SECCOMP_MESSAGE_COMMON_CONTENT " in syscall "; +#endif + static const char kSeccompErrorPostfix[] = "\n"; + WriteToStdErr(kSeccompErrorPrefix, sizeof(kSeccompErrorPrefix) - 1); + WriteToStdErr(sysno_base10, sizeof(sysno_base10)); + WriteToStdErr(kSeccompErrorPostfix, sizeof(kSeccompErrorPostfix) - 1); +} + +} // namespace. + +namespace sandbox { + +intptr_t CrashSIGSYS_Handler(const struct arch_seccomp_data& args, void* aux) { + uint32_t syscall = SyscallNumberToOffsetFromBase(args.nr); + + PrintSyscallError(syscall); + + // Encode 8-bits of the 1st two arguments too, so we can discern which socket + // type, which fcntl, ... etc., without being likely to hit a mapped + // address. + // Do not encode more bits here without thinking about increasing the + // likelihood of collision with mapped pages. + syscall |= ((args.args[0] & 0xffUL) << 12); + syscall |= ((args.args[1] & 0xffUL) << 20); + // Purposefully dereference the syscall as an address so it'll show up very + // clearly and easily in crash dumps. + volatile char* addr = reinterpret_cast<volatile char*>(syscall); + *addr = '\0'; + // In case we hit a mapped address, hit the null page with just the syscall, + // for paranoia. + syscall &= 0xfffUL; + addr = reinterpret_cast<volatile char*>(syscall); + *addr = '\0'; + for (;;) + _exit(1); +} + +// TODO(jln): refactor the reporting functions. + +intptr_t SIGSYSCloneFailure(const struct arch_seccomp_data& args, void* aux) { + static const char kSeccompCloneError[] = + __FILE__":**CRASHING**:" SECCOMP_MESSAGE_CLONE_CONTENT "\n"; + WriteToStdErr(kSeccompCloneError, sizeof(kSeccompCloneError) - 1); + // "flags" is the first argument in the kernel's clone(). + // Mark as volatile to be able to find the value on the stack in a minidump. + volatile uint64_t clone_flags = args.args[0]; + volatile char* addr; + if (IsArchitectureX86_64()) { + addr = reinterpret_cast<volatile char*>(clone_flags & 0xFFFFFF); + *addr = '\0'; + } + // Hit the NULL page if this fails to fault. + addr = reinterpret_cast<volatile char*>(clone_flags & 0xFFF); + *addr = '\0'; + for (;;) + _exit(1); +} + +intptr_t SIGSYSPrctlFailure(const struct arch_seccomp_data& args, + void* /* aux */) { + static const char kSeccompPrctlError[] = + __FILE__":**CRASHING**:" SECCOMP_MESSAGE_PRCTL_CONTENT "\n"; + WriteToStdErr(kSeccompPrctlError, sizeof(kSeccompPrctlError) - 1); + // Mark as volatile to be able to find the value on the stack in a minidump. + volatile uint64_t option = args.args[0]; + volatile char* addr = + reinterpret_cast<volatile char*>(option & 0xFFF); + *addr = '\0'; + for (;;) + _exit(1); +} + +intptr_t SIGSYSIoctlFailure(const struct arch_seccomp_data& args, + void* /* aux */) { + static const char kSeccompIoctlError[] = + __FILE__":**CRASHING**:" SECCOMP_MESSAGE_IOCTL_CONTENT "\n"; + WriteToStdErr(kSeccompIoctlError, sizeof(kSeccompIoctlError) - 1); + // Make "request" volatile so that we can see it on the stack in a minidump. + volatile uint64_t request = args.args[1]; + volatile char* addr = reinterpret_cast<volatile char*>(request & 0xFFFF); + *addr = '\0'; + // Hit the NULL page if this fails. + addr = reinterpret_cast<volatile char*>(request & 0xFFF); + *addr = '\0'; + for (;;) + _exit(1); +} + +intptr_t SIGSYSKillFailure(const struct arch_seccomp_data& args, + void* /* aux */) { + static const char kSeccompKillError[] = + __FILE__":**CRASHING**:" SECCOMP_MESSAGE_KILL_CONTENT "\n"; + WriteToStdErr(kSeccompKillError, sizeof(kSeccompKillError) - 1); + // Make "pid" volatile so that we can see it on the stack in a minidump. + volatile uint64_t my_pid = sys_getpid(); + volatile char* addr = reinterpret_cast<volatile char*>(my_pid & 0xFFF); + *addr = '\0'; + for (;;) + _exit(1); +} + +intptr_t SIGSYSFutexFailure(const struct arch_seccomp_data& args, + void* /* aux */) { + static const char kSeccompFutexError[] = + __FILE__ ":**CRASHING**:" SECCOMP_MESSAGE_FUTEX_CONTENT "\n"; + WriteToStdErr(kSeccompFutexError, sizeof(kSeccompFutexError) - 1); + volatile int futex_op = args.args[1]; + volatile char* addr = reinterpret_cast<volatile char*>(futex_op & 0xFFF); + *addr = '\0'; + for (;;) + _exit(1); +} + +intptr_t SIGSYSSchedHandler(const struct arch_seccomp_data& args, + void* aux) { + switch (args.nr) { + case __NR_sched_getaffinity: + case __NR_sched_getattr: + case __NR_sched_getparam: + case __NR_sched_getscheduler: + case __NR_sched_rr_get_interval: + case __NR_sched_setaffinity: + case __NR_sched_setattr: + case __NR_sched_setparam: + case __NR_sched_setscheduler: + const pid_t tid = sys_gettid(); + // The first argument is the pid. If is our thread id, then replace it + // with 0, which is equivalent and allowed by the policy. + if (args.args[0] == static_cast<uint64_t>(tid)) { + return Syscall::Call(args.nr, + 0, + static_cast<intptr_t>(args.args[1]), + static_cast<intptr_t>(args.args[2]), + static_cast<intptr_t>(args.args[3]), + static_cast<intptr_t>(args.args[4]), + static_cast<intptr_t>(args.args[5])); + } + break; + } + + CrashSIGSYS_Handler(args, aux); + + // Should never be reached. + RAW_CHECK(false); + return -ENOSYS; +} + +bpf_dsl::ResultExpr CrashSIGSYS() { + return bpf_dsl::Trap(CrashSIGSYS_Handler, NULL); +} + +bpf_dsl::ResultExpr CrashSIGSYSClone() { + return bpf_dsl::Trap(SIGSYSCloneFailure, NULL); +} + +bpf_dsl::ResultExpr CrashSIGSYSPrctl() { + return bpf_dsl::Trap(SIGSYSPrctlFailure, NULL); +} + +bpf_dsl::ResultExpr CrashSIGSYSIoctl() { + return bpf_dsl::Trap(SIGSYSIoctlFailure, NULL); +} + +bpf_dsl::ResultExpr CrashSIGSYSKill() { + return bpf_dsl::Trap(SIGSYSKillFailure, NULL); +} + +bpf_dsl::ResultExpr CrashSIGSYSFutex() { + return bpf_dsl::Trap(SIGSYSFutexFailure, NULL); +} + +bpf_dsl::ResultExpr RewriteSchedSIGSYS() { + return bpf_dsl::Trap(SIGSYSSchedHandler, NULL); +} + +const char* GetErrorMessageContentForTests() { + return SECCOMP_MESSAGE_COMMON_CONTENT; +} + +const char* GetCloneErrorMessageContentForTests() { + return SECCOMP_MESSAGE_CLONE_CONTENT; +} + +const char* GetPrctlErrorMessageContentForTests() { + return SECCOMP_MESSAGE_PRCTL_CONTENT; +} + +const char* GetIoctlErrorMessageContentForTests() { + return SECCOMP_MESSAGE_IOCTL_CONTENT; +} + +const char* GetKillErrorMessageContentForTests() { + return SECCOMP_MESSAGE_KILL_CONTENT; +} + +const char* GetFutexErrorMessageContentForTests() { + return SECCOMP_MESSAGE_FUTEX_CONTENT; +} + +} // namespace sandbox. diff --git a/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h b/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h new file mode 100644 index 0000000000..c64e994172 --- /dev/null +++ b/sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h @@ -0,0 +1,82 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SIGSYS_HANDLERS_H_ +#define SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SIGSYS_HANDLERS_H_ + +#include <stdint.h> + +#include "build/build_config.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl_forward.h" +#include "sandbox/sandbox_export.h" + +// The handlers are suitable for use in Trap() error codes. They are +// guaranteed to be async-signal safe. +// See sandbox/linux/seccomp-bpf/trap.h to see how they work. + +namespace sandbox { + +struct arch_seccomp_data; + +// This handler will crash the currently running process. The crashing address +// will be the number of the current system call, extracted from |args|. +// This handler will also print to stderr the number of the crashing syscall. +SANDBOX_EXPORT intptr_t + CrashSIGSYS_Handler(const struct arch_seccomp_data& args, void* aux); + +// The following three handlers are suitable to report failures with the +// clone(), prctl() and ioctl() system calls respectively. + +// The crashing address will be (clone_flags & 0xFFFFFF), where clone_flags is +// the clone(2) argument, extracted from |args|. +SANDBOX_EXPORT intptr_t + SIGSYSCloneFailure(const struct arch_seccomp_data& args, void* aux); +// The crashing address will be (option & 0xFFF), where option is the prctl(2) +// argument. +SANDBOX_EXPORT intptr_t + SIGSYSPrctlFailure(const struct arch_seccomp_data& args, void* aux); +// The crashing address will be request & 0xFFFF, where request is the ioctl(2) +// argument. +SANDBOX_EXPORT intptr_t + SIGSYSIoctlFailure(const struct arch_seccomp_data& args, void* aux); +// The crashing address will be (pid & 0xFFF), where pid is the first +// argument (and can be a tid). +SANDBOX_EXPORT intptr_t + SIGSYSKillFailure(const struct arch_seccomp_data& args, void* aux); +// The crashing address will be (op & 0xFFF), where op is the second +// argument. +SANDBOX_EXPORT intptr_t + SIGSYSFutexFailure(const struct arch_seccomp_data& args, void* aux); +// If the syscall is not being called on the current tid, crashes in the same +// way as CrashSIGSYS_Handler. Otherwise, returns the result of calling the +// syscall with the pid argument set to 0 (which for these calls means the +// current thread). The following syscalls are supported: +// +// sched_getaffinity(), sched_getattr(), sched_getparam(), sched_getscheduler(), +// sched_rr_get_interval(), sched_setaffinity(), sched_setattr(), +// sched_setparam(), sched_setscheduler() +SANDBOX_EXPORT intptr_t + SIGSYSSchedHandler(const struct arch_seccomp_data& args, void* aux); + +// Variants of the above functions for use with bpf_dsl. +SANDBOX_EXPORT bpf_dsl::ResultExpr CrashSIGSYS(); +SANDBOX_EXPORT bpf_dsl::ResultExpr CrashSIGSYSClone(); +SANDBOX_EXPORT bpf_dsl::ResultExpr CrashSIGSYSPrctl(); +SANDBOX_EXPORT bpf_dsl::ResultExpr CrashSIGSYSIoctl(); +SANDBOX_EXPORT bpf_dsl::ResultExpr CrashSIGSYSKill(); +SANDBOX_EXPORT bpf_dsl::ResultExpr CrashSIGSYSFutex(); +SANDBOX_EXPORT bpf_dsl::ResultExpr RewriteSchedSIGSYS(); + +// Following four functions return substrings of error messages used +// in the above four functions. They are useful in death tests. +SANDBOX_EXPORT const char* GetErrorMessageContentForTests(); +SANDBOX_EXPORT const char* GetCloneErrorMessageContentForTests(); +SANDBOX_EXPORT const char* GetPrctlErrorMessageContentForTests(); +SANDBOX_EXPORT const char* GetIoctlErrorMessageContentForTests(); +SANDBOX_EXPORT const char* GetKillErrorMessageContentForTests(); +SANDBOX_EXPORT const char* GetFutexErrorMessageContentForTests(); + +} // namespace sandbox. + +#endif // SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SIGSYS_HANDLERS_H_ diff --git a/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.cc b/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.cc new file mode 100644 index 0000000000..58ffb843a8 --- /dev/null +++ b/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.cc @@ -0,0 +1,319 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h" + +#include <errno.h> +#include <fcntl.h> +#include <fcntl.h> +#include <linux/net.h> +#include <sched.h> +#include <signal.h> +#include <stdint.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include "base/logging.h" +#include "base/macros.h" +#include "base/time/time.h" +#include "build/build_config.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl.h" +#include "sandbox/linux/bpf_dsl/seccomp_macros.h" +#include "sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h" +#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" +#include "sandbox/linux/system_headers/linux_futex.h" +#include "sandbox/linux/system_headers/linux_syscalls.h" +#include "sandbox/linux/system_headers/linux_time.h" + +// PNaCl toolchain does not provide sys/ioctl.h header. +#if !defined(OS_NACL_NONSFI) +#include <sys/ioctl.h> +#endif + +#if defined(OS_ANDROID) + +#if !defined(F_DUPFD_CLOEXEC) +#define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6) +#endif + +// https://android.googlesource.com/platform/bionic/+/lollipop-release/libc/private/bionic_prctl.h +#if !defined(PR_SET_VMA) +#define PR_SET_VMA 0x53564d41 +#endif + +// https://android.googlesource.com/platform/system/core/+/lollipop-release/libcutils/sched_policy.c +#if !defined(PR_SET_TIMERSLACK_PID) +#define PR_SET_TIMERSLACK_PID 41 +#endif + +#endif // defined(OS_ANDROID) + +#if defined(__arm__) && !defined(MAP_STACK) +#define MAP_STACK 0x20000 // Daisy build environment has old headers. +#endif + +#if defined(__mips__) && !defined(MAP_STACK) +#define MAP_STACK 0x40000 +#endif +namespace { + +inline bool IsArchitectureX86_64() { +#if defined(__x86_64__) + return true; +#else + return false; +#endif +} + +inline bool IsArchitectureI386() { +#if defined(__i386__) + return true; +#else + return false; +#endif +} + +inline bool IsAndroid() { +#if defined(OS_ANDROID) + return true; +#else + return false; +#endif +} + +inline bool IsArchitectureMips() { +#if defined(__mips__) + return true; +#else + return false; +#endif +} + +} // namespace. + +#define CASES SANDBOX_BPF_DSL_CASES + +using sandbox::bpf_dsl::Allow; +using sandbox::bpf_dsl::Arg; +using sandbox::bpf_dsl::BoolExpr; +using sandbox::bpf_dsl::Error; +using sandbox::bpf_dsl::If; +using sandbox::bpf_dsl::ResultExpr; + +namespace sandbox { + +#if !defined(OS_NACL_NONSFI) +// Allow Glibc's and Android pthread creation flags, crash on any other +// thread creation attempts and EPERM attempts to use neither +// CLONE_VM, nor CLONE_THREAD, which includes all fork() implementations. +ResultExpr RestrictCloneToThreadsAndEPERMFork() { + const Arg<unsigned long> flags(0); + + // TODO(mdempsky): Extend DSL to support (flags & ~mask1) == mask2. + const uint64_t kAndroidCloneMask = CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_THREAD | + CLONE_SYSVSEM; + const uint64_t kObsoleteAndroidCloneMask = kAndroidCloneMask | CLONE_DETACHED; + + const uint64_t kGlibcPthreadFlags = + CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | + CLONE_SYSVSEM | CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID; + const BoolExpr glibc_test = flags == kGlibcPthreadFlags; + + const BoolExpr android_test = flags == kAndroidCloneMask || + flags == kObsoleteAndroidCloneMask || + flags == kGlibcPthreadFlags; + + return If(IsAndroid() ? android_test : glibc_test, Allow()) + .ElseIf((flags & (CLONE_VM | CLONE_THREAD)) == 0, Error(EPERM)) + .Else(CrashSIGSYSClone()); +} + +ResultExpr RestrictPrctl() { + // Will need to add seccomp compositing in the future. PR_SET_PTRACER is + // used by breakpad but not needed anymore. + const Arg<int> option(0); + return Switch(option) + .CASES((PR_GET_NAME, PR_SET_NAME, PR_GET_DUMPABLE, PR_SET_DUMPABLE), + Allow()) +#if defined(OS_ANDROID) + .CASES((PR_SET_VMA, PR_SET_TIMERSLACK_PID), Allow()) +#endif + .Default(CrashSIGSYSPrctl()); +} + +ResultExpr RestrictIoctl() { + const Arg<int> request(1); + return Switch(request).CASES((TCGETS, FIONREAD), Allow()).Default( + CrashSIGSYSIoctl()); +} + +ResultExpr RestrictMmapFlags() { + // The flags you see are actually the allowed ones, and the variable is a + // "denied" mask because of the negation operator. + // Significantly, we don't permit MAP_HUGETLB, or the newer flags such as + // MAP_POPULATE. + // TODO(davidung), remove MAP_DENYWRITE with updated Tegra libraries. + const uint64_t kAllowedMask = MAP_SHARED | MAP_PRIVATE | MAP_ANONYMOUS | + MAP_STACK | MAP_NORESERVE | MAP_FIXED | + MAP_DENYWRITE; + const Arg<int> flags(3); + return If((flags & ~kAllowedMask) == 0, Allow()).Else(CrashSIGSYS()); +} + +ResultExpr RestrictMprotectFlags() { + // The flags you see are actually the allowed ones, and the variable is a + // "denied" mask because of the negation operator. + // Significantly, we don't permit weird undocumented flags such as + // PROT_GROWSDOWN. + const uint64_t kAllowedMask = PROT_READ | PROT_WRITE | PROT_EXEC; + const Arg<int> prot(2); + return If((prot & ~kAllowedMask) == 0, Allow()).Else(CrashSIGSYS()); +} + +ResultExpr RestrictFcntlCommands() { + // We also restrict the flags in F_SETFL. We don't want to permit flags with + // a history of trouble such as O_DIRECT. The flags you see are actually the + // allowed ones, and the variable is a "denied" mask because of the negation + // operator. + // Glibc overrides the kernel's O_LARGEFILE value. Account for this. + uint64_t kOLargeFileFlag = O_LARGEFILE; + if (IsArchitectureX86_64() || IsArchitectureI386() || IsArchitectureMips()) + kOLargeFileFlag = 0100000; + + const Arg<int> cmd(1); + const Arg<long> long_arg(2); + + const uint64_t kAllowedMask = O_ACCMODE | O_APPEND | O_NONBLOCK | O_SYNC | + kOLargeFileFlag | O_CLOEXEC | O_NOATIME; + return Switch(cmd) + .CASES((F_GETFL, + F_GETFD, + F_SETFD, + F_SETLK, + F_SETLKW, + F_GETLK, + F_DUPFD, + F_DUPFD_CLOEXEC), + Allow()) + .Case(F_SETFL, + If((long_arg & ~kAllowedMask) == 0, Allow()).Else(CrashSIGSYS())) + .Default(CrashSIGSYS()); +} + +#if defined(__i386__) || defined(__mips__) +ResultExpr RestrictSocketcallCommand() { + // Unfortunately, we are unable to restrict the first parameter to + // socketpair(2). Whilst initially sounding bad, it's noteworthy that very + // few protocols actually support socketpair(2). The scary call that we're + // worried about, socket(2), remains blocked. + const Arg<int> call(0); + return Switch(call) + .CASES((SYS_SOCKETPAIR, + SYS_SHUTDOWN, + SYS_RECV, + SYS_SEND, + SYS_RECVFROM, + SYS_SENDTO, + SYS_RECVMSG, + SYS_SENDMSG), + Allow()) + .Default(Error(EPERM)); +} +#endif + +ResultExpr RestrictKillTarget(pid_t target_pid, int sysno) { + switch (sysno) { + case __NR_kill: + case __NR_tgkill: { + const Arg<pid_t> pid(0); + return If(pid == target_pid, Allow()).Else(CrashSIGSYSKill()); + } + case __NR_tkill: + return CrashSIGSYSKill(); + default: + NOTREACHED(); + return CrashSIGSYS(); + } +} + +ResultExpr RestrictFutex() { + const uint64_t kAllowedFutexFlags = FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME; + const Arg<int> op(1); + return Switch(op & ~kAllowedFutexFlags) + .CASES((FUTEX_WAIT, + FUTEX_WAKE, + FUTEX_REQUEUE, + FUTEX_CMP_REQUEUE, + FUTEX_WAKE_OP, + FUTEX_WAIT_BITSET, + FUTEX_WAKE_BITSET), + Allow()) + .Default(CrashSIGSYSFutex()); +} + +ResultExpr RestrictGetSetpriority(pid_t target_pid) { + const Arg<int> which(0); + const Arg<int> who(1); + return If(which == PRIO_PROCESS, + If(who == 0 || who == target_pid, Allow()).Else(Error(EPERM))) + .Else(CrashSIGSYS()); +} + +ResultExpr RestrictSchedTarget(pid_t target_pid, int sysno) { + switch (sysno) { + case __NR_sched_getaffinity: + case __NR_sched_getattr: + case __NR_sched_getparam: + case __NR_sched_getscheduler: + case __NR_sched_rr_get_interval: + case __NR_sched_setaffinity: + case __NR_sched_setattr: + case __NR_sched_setparam: + case __NR_sched_setscheduler: { + const Arg<pid_t> pid(0); + return If(pid == 0 || pid == target_pid, Allow()) + .Else(RewriteSchedSIGSYS()); + } + default: + NOTREACHED(); + return CrashSIGSYS(); + } +} + +ResultExpr RestrictPrlimit64(pid_t target_pid) { + const Arg<pid_t> pid(0); + return If(pid == 0 || pid == target_pid, Allow()).Else(CrashSIGSYS()); +} + +ResultExpr RestrictGetrusage() { + const Arg<int> who(0); + return If(who == RUSAGE_SELF, Allow()).Else(CrashSIGSYS()); +} +#endif // !defined(OS_NACL_NONSFI) + +ResultExpr RestrictClockID() { + static_assert(4 == sizeof(clockid_t), "clockid_t is not 32bit"); + const Arg<clockid_t> clockid(0); + return If( +#if defined(OS_CHROMEOS) + // Allow the special clock for Chrome OS used by Chrome tracing. + clockid == base::TraceTicks::kClockSystemTrace || +#endif + clockid == CLOCK_MONOTONIC || + clockid == CLOCK_MONOTONIC_COARSE || + clockid == CLOCK_PROCESS_CPUTIME_ID || + clockid == CLOCK_REALTIME || + clockid == CLOCK_REALTIME_COARSE || + clockid == CLOCK_THREAD_CPUTIME_ID, + Allow()).Else(CrashSIGSYS()); +} + +} // namespace sandbox. diff --git a/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h b/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h new file mode 100644 index 0000000000..9eb35d10e0 --- /dev/null +++ b/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h @@ -0,0 +1,100 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SYSCALL_PARAMETERS_RESTRICTIONS_H_ +#define SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SYSCALL_PARAMETERS_RESTRICTIONS_H_ + +#include <unistd.h> + +#include "build/build_config.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl_forward.h" +#include "sandbox/sandbox_export.h" + +// These are helpers to build seccomp-bpf policies, i.e. policies for a +// sandbox that reduces the Linux kernel's attack surface. They return a +// bpf_dsl::ResultExpr suitable to restrict certain system call parameters. + +namespace sandbox { + +// Allow clone(2) for threads. +// Reject fork(2) attempts with EPERM. +// Don't restrict on ASAN. +// Crash if anything else is attempted. +SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictCloneToThreadsAndEPERMFork(); + +// Allow PR_SET_NAME, PR_SET_DUMPABLE, PR_GET_DUMPABLE. +// Crash if anything else is attempted. +SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictPrctl(); + +// Allow TCGETS and FIONREAD. +// Crash if anything else is attempted. +SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictIoctl(); + +// Restrict the flags argument in mmap(2). +// Only allow: MAP_SHARED | MAP_PRIVATE | MAP_ANONYMOUS | +// MAP_STACK | MAP_NORESERVE | MAP_FIXED | MAP_DENYWRITE. +// Crash if any other flag is used. +SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictMmapFlags(); + +// Restrict the prot argument in mprotect(2). +// Only allow: PROT_READ | PROT_WRITE | PROT_EXEC. +SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictMprotectFlags(); + +// Restrict fcntl(2) cmd argument to: +// We allow F_GETFL, F_SETFL, F_GETFD, F_SETFD, F_DUPFD, F_DUPFD_CLOEXEC, +// F_SETLK, F_SETLKW and F_GETLK. +// Also, in F_SETFL, restrict the allowed flags to: O_ACCMODE | O_APPEND | +// O_NONBLOCK | O_SYNC | O_LARGEFILE | O_CLOEXEC | O_NOATIME. +SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictFcntlCommands(); + +#if defined(__i386__) || defined(__mips__) +// Restrict socketcall(2) to only allow socketpair(2), send(2), recv(2), +// sendto(2), recvfrom(2), shutdown(2), sendmsg(2) and recvmsg(2). +SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictSocketcallCommand(); +#endif + +// Restrict |sysno| (which must be kill, tkill or tgkill) by allowing tgkill or +// kill iff the first parameter is |target_pid|, crashing otherwise or if +// |sysno| is tkill. +SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictKillTarget(pid_t target_pid, + int sysno); + +// Crash if FUTEX_CMP_REQUEUE_PI is used in the second argument of futex(2). +SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictFutex(); + +// Crash if |which| is not PRIO_PROCESS. EPERM if |who| is not 0, neither +// |target_pid| while calling setpriority(2) / getpriority(2). +SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictGetSetpriority(pid_t target_pid); + +// Restricts |pid| for sched_* syscalls which take a pid as the first argument. +// We only allow calling these syscalls if the pid argument is equal to the pid +// of the sandboxed process or 0 (indicating the current thread). The following +// syscalls are supported: +// +// sched_getaffinity(), sched_getattr(), sched_getparam(), sched_getscheduler(), +// sched_rr_get_interval(), sched_setaffinity(), sched_setattr(), +// sched_setparam(), sched_setscheduler() +SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictSchedTarget(pid_t target_pid, + int sysno); + +// Restricts the |pid| argument of prlimit64 to 0 (meaning the calling process) +// or target_pid. +SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictPrlimit64(pid_t target_pid); + +// Restricts the |who| argument of getrusage to RUSAGE_SELF (meaning the calling +// process). +SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictGetrusage(); + +// Restrict |clk_id| for clock_getres(), clock_gettime() and clock_settime(). +// We allow accessing only CLOCK_MONOTONIC, CLOCK_PROCESS_CPUTIME_ID, +// CLOCK_REALTIME, and CLOCK_THREAD_CPUTIME_ID. In particular, this disallows +// access to arbitrary per-{process,thread} CPU-time clock IDs (such as those +// returned by {clock,pthread}_getcpuclockid), which can leak information +// about the state of the host OS. +// On Chrome OS, base::TraceTicks::kClockSystemTrace is also allowed. +SANDBOX_EXPORT bpf_dsl::ResultExpr RestrictClockID(); + +} // namespace sandbox. + +#endif // SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SYSCALL_PARAMETERS_RESTRICTIONS_H_ diff --git a/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions_unittests.cc b/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions_unittests.cc new file mode 100644 index 0000000000..aaed480d69 --- /dev/null +++ b/sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions_unittests.cc @@ -0,0 +1,282 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h" + +#include <errno.h> +#include <sched.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include "base/bind.h" +#include "base/synchronization/waitable_event.h" +#include "base/sys_info.h" +#include "base/threading/thread.h" +#include "base/time/time.h" +#include "build/build_config.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl.h" +#include "sandbox/linux/bpf_dsl/policy.h" +#include "sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h" +#include "sandbox/linux/seccomp-bpf/bpf_tests.h" +#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" +#include "sandbox/linux/seccomp-bpf/syscall.h" +#include "sandbox/linux/services/syscall_wrappers.h" +#include "sandbox/linux/system_headers/linux_syscalls.h" +#include "sandbox/linux/system_headers/linux_time.h" +#include "sandbox/linux/tests/unit_tests.h" + +#if !defined(OS_ANDROID) +#include "third_party/lss/linux_syscall_support.h" // for MAKE_PROCESS_CPUCLOCK +#endif + +namespace sandbox { + +namespace { + +// NOTE: most of the parameter restrictions are tested in +// baseline_policy_unittest.cc as a more end-to-end test. + +using sandbox::bpf_dsl::Allow; +using sandbox::bpf_dsl::ResultExpr; + +class RestrictClockIdPolicy : public bpf_dsl::Policy { + public: + RestrictClockIdPolicy() {} + ~RestrictClockIdPolicy() override {} + + ResultExpr EvaluateSyscall(int sysno) const override { + switch (sysno) { + case __NR_clock_gettime: + case __NR_clock_getres: + return RestrictClockID(); + default: + return Allow(); + } + } +}; + +void CheckClock(clockid_t clockid) { + struct timespec ts; + ts.tv_sec = -1; + ts.tv_nsec = -1; + BPF_ASSERT_EQ(0, clock_getres(clockid, &ts)); + BPF_ASSERT_EQ(0, ts.tv_sec); + BPF_ASSERT_LE(0, ts.tv_nsec); + ts.tv_sec = -1; + ts.tv_nsec = -1; + BPF_ASSERT_EQ(0, clock_gettime(clockid, &ts)); + BPF_ASSERT_LE(0, ts.tv_sec); + BPF_ASSERT_LE(0, ts.tv_nsec); +} + +BPF_TEST_C(ParameterRestrictions, + clock_gettime_allowed, + RestrictClockIdPolicy) { + CheckClock(CLOCK_MONOTONIC); + CheckClock(CLOCK_MONOTONIC_COARSE); + CheckClock(CLOCK_PROCESS_CPUTIME_ID); + CheckClock(CLOCK_REALTIME); + CheckClock(CLOCK_REALTIME_COARSE); + CheckClock(CLOCK_THREAD_CPUTIME_ID); +} + +BPF_DEATH_TEST_C(ParameterRestrictions, + clock_gettime_crash_monotonic_raw, + DEATH_SEGV_MESSAGE(sandbox::GetErrorMessageContentForTests()), + RestrictClockIdPolicy) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); +} + +#if defined(OS_CHROMEOS) + +// A custom BPF tester delegate to run IsRunningOnChromeOS() before +// the sandbox is enabled because we cannot run it with non-SFI BPF +// sandbox enabled. +class ClockSystemTesterDelegate : public sandbox::BPFTesterDelegate { + public: + ClockSystemTesterDelegate() + : is_running_on_chromeos_(base::SysInfo::IsRunningOnChromeOS()) {} + ~ClockSystemTesterDelegate() override {} + + scoped_ptr<sandbox::bpf_dsl::Policy> GetSandboxBPFPolicy() override { + return scoped_ptr<sandbox::bpf_dsl::Policy>(new RestrictClockIdPolicy()); + } + void RunTestFunction() override { + if (is_running_on_chromeos_) { + CheckClock(base::TraceTicks::kClockSystemTrace); + } else { + struct timespec ts; + // kClockSystemTrace is 11, which is CLOCK_THREAD_CPUTIME_ID of + // the init process (pid=1). If kernel supports this feature, + // this may succeed even if this is not running on Chrome OS. We + // just check this clock_gettime call does not crash. + clock_gettime(base::TraceTicks::kClockSystemTrace, &ts); + } + } + + private: + const bool is_running_on_chromeos_; + DISALLOW_COPY_AND_ASSIGN(ClockSystemTesterDelegate); +}; + +BPF_TEST_D(BPFTest, BPFTestWithDelegateClass, ClockSystemTesterDelegate); + +#elif defined(OS_LINUX) + +BPF_DEATH_TEST_C(ParameterRestrictions, + clock_gettime_crash_system_trace, + DEATH_SEGV_MESSAGE(sandbox::GetErrorMessageContentForTests()), + RestrictClockIdPolicy) { + struct timespec ts; + clock_gettime(base::TraceTicks::kClockSystemTrace, &ts); +} + +#endif // defined(OS_CHROMEOS) + +#if !defined(OS_ANDROID) +BPF_DEATH_TEST_C(ParameterRestrictions, + clock_gettime_crash_cpu_clock, + DEATH_SEGV_MESSAGE(sandbox::GetErrorMessageContentForTests()), + RestrictClockIdPolicy) { + // We can't use clock_getcpuclockid() because it's not implemented in newlib, + // and it might not work inside the sandbox anyway. + const pid_t kInitPID = 1; + const clockid_t kInitCPUClockID = + MAKE_PROCESS_CPUCLOCK(kInitPID, CPUCLOCK_SCHED); + + struct timespec ts; + clock_gettime(kInitCPUClockID, &ts); +} +#endif // !defined(OS_ANDROID) + +class RestrictSchedPolicy : public bpf_dsl::Policy { + public: + RestrictSchedPolicy() {} + ~RestrictSchedPolicy() override {} + + ResultExpr EvaluateSyscall(int sysno) const override { + switch (sysno) { + case __NR_sched_getparam: + return RestrictSchedTarget(getpid(), sysno); + default: + return Allow(); + } + } +}; + +void CheckSchedGetParam(pid_t pid, struct sched_param* param) { + BPF_ASSERT_EQ(0, sched_getparam(pid, param)); +} + +void SchedGetParamThread(base::WaitableEvent* thread_run) { + const pid_t pid = getpid(); + const pid_t tid = sys_gettid(); + BPF_ASSERT_NE(pid, tid); + + struct sched_param current_pid_param; + CheckSchedGetParam(pid, ¤t_pid_param); + + struct sched_param zero_param; + CheckSchedGetParam(0, &zero_param); + + struct sched_param tid_param; + CheckSchedGetParam(tid, &tid_param); + + BPF_ASSERT_EQ(zero_param.sched_priority, tid_param.sched_priority); + + // Verify that the SIGSYS handler sets errno properly. + errno = 0; + BPF_ASSERT_EQ(-1, sched_getparam(tid, NULL)); + BPF_ASSERT_EQ(EINVAL, errno); + + thread_run->Signal(); +} + +BPF_TEST_C(ParameterRestrictions, + sched_getparam_allowed, + RestrictSchedPolicy) { + base::WaitableEvent thread_run(true, false); + // Run the actual test in a new thread so that the current pid and tid are + // different. + base::Thread getparam_thread("sched_getparam_thread"); + BPF_ASSERT(getparam_thread.Start()); + getparam_thread.message_loop()->PostTask( + FROM_HERE, base::Bind(&SchedGetParamThread, &thread_run)); + BPF_ASSERT(thread_run.TimedWait(base::TimeDelta::FromMilliseconds(5000))); + getparam_thread.Stop(); +} + +BPF_DEATH_TEST_C(ParameterRestrictions, + sched_getparam_crash_non_zero, + DEATH_SEGV_MESSAGE(sandbox::GetErrorMessageContentForTests()), + RestrictSchedPolicy) { + const pid_t kInitPID = 1; + struct sched_param param; + sched_getparam(kInitPID, ¶m); +} + +class RestrictPrlimit64Policy : public bpf_dsl::Policy { + public: + RestrictPrlimit64Policy() {} + ~RestrictPrlimit64Policy() override {} + + ResultExpr EvaluateSyscall(int sysno) const override { + switch (sysno) { + case __NR_prlimit64: + return RestrictPrlimit64(getpid()); + default: + return Allow(); + } + } +}; + +BPF_TEST_C(ParameterRestrictions, prlimit64_allowed, RestrictPrlimit64Policy) { + BPF_ASSERT_EQ(0, sys_prlimit64(0, RLIMIT_AS, NULL, NULL)); + BPF_ASSERT_EQ(0, sys_prlimit64(getpid(), RLIMIT_AS, NULL, NULL)); +} + +BPF_DEATH_TEST_C(ParameterRestrictions, + prlimit64_crash_not_self, + DEATH_SEGV_MESSAGE(sandbox::GetErrorMessageContentForTests()), + RestrictPrlimit64Policy) { + const pid_t kInitPID = 1; + BPF_ASSERT_NE(kInitPID, getpid()); + sys_prlimit64(kInitPID, RLIMIT_AS, NULL, NULL); +} + +class RestrictGetrusagePolicy : public bpf_dsl::Policy { + public: + RestrictGetrusagePolicy() {} + ~RestrictGetrusagePolicy() override {} + + ResultExpr EvaluateSyscall(int sysno) const override { + switch (sysno) { + case __NR_getrusage: + return RestrictGetrusage(); + default: + return Allow(); + } + } +}; + +BPF_TEST_C(ParameterRestrictions, getrusage_allowed, RestrictGetrusagePolicy) { + struct rusage usage; + BPF_ASSERT_EQ(0, getrusage(RUSAGE_SELF, &usage)); +} + +BPF_DEATH_TEST_C(ParameterRestrictions, + getrusage_crash_not_self, + DEATH_SEGV_MESSAGE(sandbox::GetErrorMessageContentForTests()), + RestrictGetrusagePolicy) { + struct rusage usage; + getrusage(RUSAGE_CHILDREN, &usage); +} + +} // namespace + +} // namespace sandbox diff --git a/sandbox/linux/seccomp-bpf-helpers/syscall_sets.cc b/sandbox/linux/seccomp-bpf-helpers/syscall_sets.cc new file mode 100644 index 0000000000..c217d47e2d --- /dev/null +++ b/sandbox/linux/seccomp-bpf-helpers/syscall_sets.cc @@ -0,0 +1,1060 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf-helpers/syscall_sets.h" + +#include "build/build_config.h" +#include "sandbox/linux/system_headers/linux_syscalls.h" + +namespace sandbox { + +// The functions below cover all existing i386, x86_64, and ARM system calls; +// excluding syscalls made obsolete in ARM EABI. +// The implicitly defined sets form a partition of the sets of +// system calls. + +bool SyscallSets::IsKill(int sysno) { + switch (sysno) { + case __NR_kill: + case __NR_tgkill: + case __NR_tkill: // Deprecated. + return true; + default: + return false; + } +} + +bool SyscallSets::IsAllowedGettime(int sysno) { + switch (sysno) { + case __NR_gettimeofday: +#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) + case __NR_time: +#endif + return true; + case __NR_adjtimex: // Privileged. + case __NR_clock_adjtime: // Privileged. + case __NR_clock_getres: // Could be allowed. + case __NR_clock_gettime: + case __NR_clock_nanosleep: // Could be allowed. + case __NR_clock_settime: // Privileged. +#if defined(__i386__) || defined(__mips__) + case __NR_ftime: // Obsolete. +#endif + case __NR_settimeofday: // Privileged. +#if defined(__i386__) || defined(__mips__) + case __NR_stime: +#endif + default: + return false; + } +} + +bool SyscallSets::IsCurrentDirectory(int sysno) { + switch (sysno) { + case __NR_getcwd: + case __NR_chdir: + case __NR_fchdir: + return true; + default: + return false; + } +} + +bool SyscallSets::IsUmask(int sysno) { + switch (sysno) { + case __NR_umask: + return true; + default: + return false; + } +} + +// System calls that directly access the file system. They might acquire +// a new file descriptor or otherwise perform an operation directly +// via a path. +// Both EPERM and ENOENT are valid errno unless otherwise noted in comment. +bool SyscallSets::IsFileSystem(int sysno) { + switch (sysno) { +#if !defined(__aarch64__) + case __NR_access: // EPERM not a valid errno. + case __NR_chmod: + case __NR_chown: +#if defined(__i386__) || defined(__arm__) + case __NR_chown32: +#endif + case __NR_creat: + case __NR_futimesat: // Should be called utimesat ? + case __NR_lchown: + case __NR_link: + case __NR_lstat: // EPERM not a valid errno. + case __NR_mkdir: + case __NR_mknod: + case __NR_open: + case __NR_readlink: // EPERM not a valid errno. + case __NR_rename: + case __NR_rmdir: + case __NR_stat: // EPERM not a valid errno. + case __NR_symlink: + case __NR_unlink: + case __NR_uselib: // Neither EPERM, nor ENOENT are valid errno. + case __NR_ustat: // Same as above. Deprecated. + case __NR_utimes: +#endif // !defined(__aarch64__) + + case __NR_execve: + case __NR_faccessat: // EPERM not a valid errno. + case __NR_fchmodat: + case __NR_fchownat: // Should be called chownat ? +#if defined(__x86_64__) || defined(__aarch64__) + case __NR_newfstatat: // fstatat(). EPERM not a valid errno. +#elif defined(__i386__) || defined(__arm__) || defined(__mips__) + case __NR_fstatat64: +#endif +#if defined(__i386__) || defined(__arm__) + case __NR_lchown32: +#endif + case __NR_linkat: + case __NR_lookup_dcookie: // ENOENT not a valid errno. + +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + case __NR_lstat64: +#endif +#if defined(__i386__) || defined(__arm__) || defined(__x86_64__) + case __NR_memfd_create: +#endif + case __NR_mkdirat: + case __NR_mknodat: +#if defined(__i386__) + case __NR_oldlstat: + case __NR_oldstat: +#endif + case __NR_openat: + case __NR_readlinkat: + case __NR_renameat: + case __NR_renameat2: +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + case __NR_stat64: +#endif + case __NR_statfs: // EPERM not a valid errno. +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + case __NR_statfs64: +#endif + case __NR_symlinkat: + case __NR_truncate: +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + case __NR_truncate64: +#endif + case __NR_unlinkat: +#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) + case __NR_utime: +#endif + case __NR_utimensat: // New. + return true; + default: + return false; + } +} + +bool SyscallSets::IsAllowedFileSystemAccessViaFd(int sysno) { + switch (sysno) { + case __NR_fstat: +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + case __NR_fstat64: +#endif + return true; +// TODO(jln): these should be denied gracefully as well (moved below). +#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) + case __NR_fadvise64: // EPERM not a valid errno. +#endif +#if defined(__i386__) + case __NR_fadvise64_64: +#endif +#if defined(__arm__) + case __NR_arm_fadvise64_64: +#endif + case __NR_fdatasync: // EPERM not a valid errno. + case __NR_flock: // EPERM not a valid errno. + case __NR_fstatfs: // Give information about the whole filesystem. +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + case __NR_fstatfs64: +#endif + case __NR_fsync: // EPERM not a valid errno. +#if defined(__i386__) + case __NR_oldfstat: +#endif +#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \ + defined(__aarch64__) + case __NR_sync_file_range: // EPERM not a valid errno. +#elif defined(__arm__) + case __NR_arm_sync_file_range: // EPERM not a valid errno. +#endif + default: + return false; + } +} + +// EPERM is a good errno for any of these. +bool SyscallSets::IsDeniedFileSystemAccessViaFd(int sysno) { + switch (sysno) { + case __NR_fallocate: + case __NR_fchmod: + case __NR_fchown: + case __NR_ftruncate: +#if defined(__i386__) || defined(__arm__) + case __NR_fchown32: +#endif +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + case __NR_ftruncate64: +#endif +#if !defined(__aarch64__) + case __NR_getdents: // EPERM not a valid errno. +#endif + case __NR_getdents64: // EPERM not a valid errno. +#if defined(__i386__) || defined(__mips__) + case __NR_readdir: +#endif + return true; + default: + return false; + } +} + +bool SyscallSets::IsGetSimpleId(int sysno) { + switch (sysno) { + case __NR_capget: + case __NR_getegid: + case __NR_geteuid: + case __NR_getgid: + case __NR_getgroups: + case __NR_getpid: + case __NR_getppid: + case __NR_getresgid: + case __NR_getsid: + case __NR_gettid: + case __NR_getuid: + case __NR_getresuid: +#if defined(__i386__) || defined(__arm__) + case __NR_getegid32: + case __NR_geteuid32: + case __NR_getgid32: + case __NR_getgroups32: + case __NR_getresgid32: + case __NR_getresuid32: + case __NR_getuid32: +#endif + return true; + default: + return false; + } +} + +bool SyscallSets::IsProcessPrivilegeChange(int sysno) { + switch (sysno) { + case __NR_capset: +#if defined(__i386__) || defined(__x86_64__) + case __NR_ioperm: // Intel privilege. + case __NR_iopl: // Intel privilege. +#endif + case __NR_setfsgid: + case __NR_setfsuid: + case __NR_setgid: + case __NR_setgroups: + case __NR_setregid: + case __NR_setresgid: + case __NR_setresuid: + case __NR_setreuid: + case __NR_setuid: +#if defined(__i386__) || defined(__arm__) + case __NR_setfsgid32: + case __NR_setfsuid32: + case __NR_setgid32: + case __NR_setgroups32: + case __NR_setregid32: + case __NR_setresgid32: + case __NR_setresuid32: + case __NR_setreuid32: + case __NR_setuid32: +#endif + return true; + default: + return false; + } +} + +bool SyscallSets::IsProcessGroupOrSession(int sysno) { + switch (sysno) { + case __NR_setpgid: +#if !defined(__aarch64__) + case __NR_getpgrp: +#endif + case __NR_setsid: + case __NR_getpgid: + return true; + default: + return false; + } +} + +bool SyscallSets::IsAllowedSignalHandling(int sysno) { + switch (sysno) { + case __NR_rt_sigaction: + case __NR_rt_sigprocmask: + case __NR_rt_sigreturn: +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + case __NR_sigaction: + case __NR_sigprocmask: + case __NR_sigreturn: +#endif + return true; + case __NR_rt_sigpending: + case __NR_rt_sigqueueinfo: + case __NR_rt_sigsuspend: + case __NR_rt_sigtimedwait: + case __NR_rt_tgsigqueueinfo: + case __NR_sigaltstack: +#if !defined(__aarch64__) + case __NR_signalfd: +#endif + case __NR_signalfd4: +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + case __NR_sigpending: + case __NR_sigsuspend: +#endif +#if defined(__i386__) || defined(__mips__) + case __NR_signal: + case __NR_sgetmask: // Obsolete. + case __NR_ssetmask: +#endif + default: + return false; + } +} + +bool SyscallSets::IsAllowedOperationOnFd(int sysno) { + switch (sysno) { + case __NR_close: + case __NR_dup: +#if !defined(__aarch64__) + case __NR_dup2: +#endif + case __NR_dup3: +#if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \ + defined(__aarch64__) + case __NR_shutdown: +#endif + return true; + case __NR_fcntl: +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + case __NR_fcntl64: +#endif + default: + return false; + } +} + +bool SyscallSets::IsKernelInternalApi(int sysno) { + switch (sysno) { + case __NR_restart_syscall: +#if defined(__arm__) + case __ARM_NR_cmpxchg: +#endif + return true; + default: + return false; + } +} + +// This should be thought through in conjunction with IsFutex(). +bool SyscallSets::IsAllowedProcessStartOrDeath(int sysno) { + switch (sysno) { + case __NR_exit: + case __NR_exit_group: + case __NR_wait4: + case __NR_waitid: +#if defined(__i386__) + case __NR_waitpid: +#endif + return true; + case __NR_clone: // Should be parameter-restricted. + case __NR_setns: // Privileged. +#if !defined(__aarch64__) + case __NR_fork: +#endif +#if defined(__i386__) || defined(__x86_64__) + case __NR_get_thread_area: +#endif +#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) + case __NR_set_thread_area: +#endif + case __NR_set_tid_address: + case __NR_unshare: +#if !defined(__mips__) && !defined(__aarch64__) + case __NR_vfork: +#endif + default: + return false; + } +} + +// It's difficult to restrict those, but there is attack surface here. +bool SyscallSets::IsAllowedFutex(int sysno) { + switch (sysno) { + case __NR_get_robust_list: + case __NR_set_robust_list: + case __NR_futex: + default: + return false; + } +} + +bool SyscallSets::IsAllowedEpoll(int sysno) { + switch (sysno) { +#if !defined(__aarch64__) + case __NR_epoll_create: + case __NR_epoll_wait: +#endif + case __NR_epoll_create1: + case __NR_epoll_ctl: + return true; + default: +#if defined(__x86_64__) + case __NR_epoll_ctl_old: +#endif + case __NR_epoll_pwait: +#if defined(__x86_64__) + case __NR_epoll_wait_old: +#endif + return false; + } +} + +bool SyscallSets::IsAllowedGetOrModifySocket(int sysno) { + switch (sysno) { +#if !defined(__aarch64__) + case __NR_pipe: +#endif + case __NR_pipe2: + return true; + default: +#if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \ + defined(__aarch64__) + case __NR_socketpair: // We will want to inspect its argument. +#endif + return false; + } +} + +bool SyscallSets::IsDeniedGetOrModifySocket(int sysno) { + switch (sysno) { +#if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \ + defined(__aarch64__) + case __NR_accept: + case __NR_accept4: + case __NR_bind: + case __NR_connect: + case __NR_socket: + case __NR_listen: + return true; +#endif + default: + return false; + } +} + +#if defined(__i386__) || defined(__mips__) +// Big multiplexing system call for sockets. +bool SyscallSets::IsSocketCall(int sysno) { + switch (sysno) { + case __NR_socketcall: + return true; + default: + return false; + } +} +#endif + +#if defined(__x86_64__) || defined(__arm__) || defined(__mips__) +bool SyscallSets::IsNetworkSocketInformation(int sysno) { + switch (sysno) { + case __NR_getpeername: + case __NR_getsockname: + case __NR_getsockopt: + case __NR_setsockopt: + return true; + default: + return false; + } +} +#endif + +bool SyscallSets::IsAllowedAddressSpaceAccess(int sysno) { + switch (sysno) { + case __NR_brk: + case __NR_mlock: + case __NR_munlock: + case __NR_munmap: + return true; + case __NR_madvise: + case __NR_mincore: + case __NR_mlockall: +#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \ + defined(__aarch64__) + case __NR_mmap: +#endif +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + case __NR_mmap2: +#endif +#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) + case __NR_modify_ldt: +#endif + case __NR_mprotect: + case __NR_mremap: + case __NR_msync: + case __NR_munlockall: + case __NR_readahead: + case __NR_remap_file_pages: +#if defined(__i386__) + case __NR_vm86: + case __NR_vm86old: +#endif + default: + return false; + } +} + +bool SyscallSets::IsAllowedGeneralIo(int sysno) { + switch (sysno) { + case __NR_lseek: +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + case __NR__llseek: +#endif +#if !defined(__aarch64__) + case __NR_poll: +#endif + case __NR_ppoll: + case __NR_pselect6: + case __NR_read: + case __NR_readv: +#if defined(__arm__) || defined(__mips__) + case __NR_recv: +#endif +#if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \ + defined(__aarch64__) + case __NR_recvfrom: // Could specify source. + case __NR_recvmsg: // Could specify source. +#endif +#if defined(__i386__) || defined(__x86_64__) + case __NR_select: +#endif +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + case __NR__newselect: +#endif +#if defined(__arm__) + case __NR_send: +#endif +#if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \ + defined(__aarch64__) + case __NR_sendmsg: // Could specify destination. + case __NR_sendto: // Could specify destination. +#endif + case __NR_write: + case __NR_writev: + return true; + case __NR_ioctl: // Can be very powerful. + case __NR_pread64: + case __NR_preadv: + case __NR_pwrite64: + case __NR_pwritev: + case __NR_recvmmsg: // Could specify source. + case __NR_sendfile: +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + case __NR_sendfile64: +#endif + case __NR_sendmmsg: // Could specify destination. + case __NR_splice: + case __NR_tee: + case __NR_vmsplice: + default: + return false; + } +} + +bool SyscallSets::IsPrctl(int sysno) { + switch (sysno) { +#if defined(__x86_64__) + case __NR_arch_prctl: +#endif + case __NR_prctl: + return true; + default: + return false; + } +} + +bool SyscallSets::IsSeccomp(int sysno) { + switch (sysno) { + case __NR_seccomp: + return true; + default: + return false; + } +} + +bool SyscallSets::IsAllowedBasicScheduler(int sysno) { + switch (sysno) { + case __NR_sched_yield: +#if !defined(__aarch64__) + case __NR_pause: +#endif + case __NR_nanosleep: + return true; + case __NR_getpriority: +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + case __NR_nice: +#endif + case __NR_setpriority: + default: + return false; + } +} + +bool SyscallSets::IsAdminOperation(int sysno) { + switch (sysno) { +#if defined(__i386__) || defined(__arm__) || defined(__mips__) + case __NR_bdflush: +#endif + case __NR_kexec_load: + case __NR_reboot: + case __NR_setdomainname: + case __NR_sethostname: + case __NR_syslog: + return true; + default: + return false; + } +} + +bool SyscallSets::IsKernelModule(int sysno) { + switch (sysno) { +#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) + case __NR_create_module: + case __NR_get_kernel_syms: // Should ENOSYS. + case __NR_query_module: +#endif + case __NR_delete_module: + case __NR_init_module: + case __NR_finit_module: + return true; + default: + return false; + } +} + +bool SyscallSets::IsGlobalFSViewChange(int sysno) { + switch (sysno) { + case __NR_pivot_root: + case __NR_chroot: + case __NR_sync: + return true; + default: + return false; + } +} + +bool SyscallSets::IsFsControl(int sysno) { + switch (sysno) { + case __NR_mount: + case __NR_nfsservctl: + case __NR_quotactl: + case __NR_swapoff: + case __NR_swapon: +#if defined(__i386__) || defined(__mips__) + case __NR_umount: +#endif + case __NR_umount2: + return true; + default: + return false; + } +} + +bool SyscallSets::IsNuma(int sysno) { + switch (sysno) { + case __NR_get_mempolicy: + case __NR_getcpu: + case __NR_mbind: +#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \ + defined(__aarch64__) + case __NR_migrate_pages: +#endif + case __NR_move_pages: + case __NR_set_mempolicy: + return true; + default: + return false; + } +} + +bool SyscallSets::IsMessageQueue(int sysno) { + switch (sysno) { + case __NR_mq_getsetattr: + case __NR_mq_notify: + case __NR_mq_open: + case __NR_mq_timedreceive: + case __NR_mq_timedsend: + case __NR_mq_unlink: + return true; + default: + return false; + } +} + +bool SyscallSets::IsGlobalProcessEnvironment(int sysno) { + switch (sysno) { + case __NR_acct: // Privileged. +#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) || \ + defined(__aarch64__) + case __NR_getrlimit: +#endif +#if defined(__i386__) || defined(__arm__) + case __NR_ugetrlimit: +#endif +#if defined(__i386__) || defined(__mips__) + case __NR_ulimit: +#endif + case __NR_getrusage: + case __NR_personality: // Can change its personality as well. + case __NR_prlimit64: // Like setrlimit / getrlimit. + case __NR_setrlimit: + case __NR_times: + return true; + default: + return false; + } +} + +bool SyscallSets::IsDebug(int sysno) { + switch (sysno) { + case __NR_ptrace: + case __NR_process_vm_readv: + case __NR_process_vm_writev: + case __NR_kcmp: + return true; + default: + return false; + } +} + +bool SyscallSets::IsGlobalSystemStatus(int sysno) { + switch (sysno) { +#if !defined(__aarch64__) + case __NR__sysctl: + case __NR_sysfs: +#endif + case __NR_sysinfo: + case __NR_uname: +#if defined(__i386__) + case __NR_olduname: + case __NR_oldolduname: +#endif + return true; + default: + return false; + } +} + +bool SyscallSets::IsEventFd(int sysno) { + switch (sysno) { +#if !defined(__aarch64__) + case __NR_eventfd: +#endif + case __NR_eventfd2: + return true; + default: + return false; + } +} + +// Asynchronous I/O API. +bool SyscallSets::IsAsyncIo(int sysno) { + switch (sysno) { + case __NR_io_cancel: + case __NR_io_destroy: + case __NR_io_getevents: + case __NR_io_setup: + case __NR_io_submit: + return true; + default: + return false; + } +} + +bool SyscallSets::IsKeyManagement(int sysno) { + switch (sysno) { + case __NR_add_key: + case __NR_keyctl: + case __NR_request_key: + return true; + default: + return false; + } +} + +#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) +bool SyscallSets::IsSystemVSemaphores(int sysno) { + switch (sysno) { + case __NR_semctl: + case __NR_semget: + case __NR_semop: + case __NR_semtimedop: + return true; + default: + return false; + } +} +#endif + +#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) +// These give a lot of ambient authority and bypass the setuid sandbox. +bool SyscallSets::IsSystemVSharedMemory(int sysno) { + switch (sysno) { + case __NR_shmat: + case __NR_shmctl: + case __NR_shmdt: + case __NR_shmget: + return true; + default: + return false; + } +} +#endif + +#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) +bool SyscallSets::IsSystemVMessageQueue(int sysno) { + switch (sysno) { + case __NR_msgctl: + case __NR_msgget: + case __NR_msgrcv: + case __NR_msgsnd: + return true; + default: + return false; + } +} +#endif + +#if defined(__i386__) || defined(__mips__) +// Big system V multiplexing system call. +bool SyscallSets::IsSystemVIpc(int sysno) { + switch (sysno) { + case __NR_ipc: + return true; + default: + return false; + } +} +#endif + +bool SyscallSets::IsAnySystemV(int sysno) { +#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) + return IsSystemVMessageQueue(sysno) || IsSystemVSemaphores(sysno) || + IsSystemVSharedMemory(sysno); +#elif defined(__i386__) || defined(__mips__) + return IsSystemVIpc(sysno); +#endif +} + +bool SyscallSets::IsAdvancedScheduler(int sysno) { + switch (sysno) { + case __NR_ioprio_get: // IO scheduler. + case __NR_ioprio_set: + case __NR_sched_get_priority_max: + case __NR_sched_get_priority_min: + case __NR_sched_getaffinity: + case __NR_sched_getattr: + case __NR_sched_getparam: + case __NR_sched_getscheduler: + case __NR_sched_rr_get_interval: + case __NR_sched_setaffinity: + case __NR_sched_setattr: + case __NR_sched_setparam: + case __NR_sched_setscheduler: + return true; + default: + return false; + } +} + +bool SyscallSets::IsInotify(int sysno) { + switch (sysno) { + case __NR_inotify_add_watch: +#if !defined(__aarch64__) + case __NR_inotify_init: +#endif + case __NR_inotify_init1: + case __NR_inotify_rm_watch: + return true; + default: + return false; + } +} + +bool SyscallSets::IsFaNotify(int sysno) { + switch (sysno) { + case __NR_fanotify_init: + case __NR_fanotify_mark: + return true; + default: + return false; + } +} + +bool SyscallSets::IsTimer(int sysno) { + switch (sysno) { + case __NR_getitimer: +#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) + case __NR_alarm: +#endif + case __NR_setitimer: + return true; + default: + return false; + } +} + +bool SyscallSets::IsAdvancedTimer(int sysno) { + switch (sysno) { + case __NR_timer_create: + case __NR_timer_delete: + case __NR_timer_getoverrun: + case __NR_timer_gettime: + case __NR_timer_settime: + case __NR_timerfd_create: + case __NR_timerfd_gettime: + case __NR_timerfd_settime: + return true; + default: + return false; + } +} + +bool SyscallSets::IsExtendedAttributes(int sysno) { + switch (sysno) { + case __NR_fgetxattr: + case __NR_flistxattr: + case __NR_fremovexattr: + case __NR_fsetxattr: + case __NR_getxattr: + case __NR_lgetxattr: + case __NR_listxattr: + case __NR_llistxattr: + case __NR_lremovexattr: + case __NR_lsetxattr: + case __NR_removexattr: + case __NR_setxattr: + return true; + default: + return false; + } +} + +// Various system calls that need to be researched. +// TODO(jln): classify this better. +bool SyscallSets::IsMisc(int sysno) { + switch (sysno) { +#if !defined(__mips__) + case __NR_getrandom: +#endif + case __NR_name_to_handle_at: + case __NR_open_by_handle_at: + case __NR_perf_event_open: + case __NR_syncfs: + case __NR_vhangup: +// The system calls below are not implemented. +#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) + case __NR_afs_syscall: +#endif +#if defined(__i386__) || defined(__mips__) + case __NR_break: +#endif +#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) + case __NR_getpmsg: +#endif +#if defined(__i386__) || defined(__mips__) + case __NR_gtty: + case __NR_idle: + case __NR_lock: + case __NR_mpx: + case __NR_prof: + case __NR_profil: +#endif +#if defined(__i386__) || defined(__x86_64__) || defined(__mips__) + case __NR_putpmsg: +#endif +#if defined(__x86_64__) + case __NR_security: +#endif +#if defined(__i386__) || defined(__mips__) + case __NR_stty: +#endif +#if defined(__x86_64__) + case __NR_tuxcall: +#endif +#if !defined(__aarch64__) + case __NR_vserver: +#endif + return true; + default: + return false; + } +} + +#if defined(__arm__) +bool SyscallSets::IsArmPciConfig(int sysno) { + switch (sysno) { + case __NR_pciconfig_iobase: + case __NR_pciconfig_read: + case __NR_pciconfig_write: + return true; + default: + return false; + } +} + +bool SyscallSets::IsArmPrivate(int sysno) { + switch (sysno) { + case __ARM_NR_breakpoint: + case __ARM_NR_cacheflush: + case __ARM_NR_set_tls: + case __ARM_NR_usr26: + case __ARM_NR_usr32: + return true; + default: + return false; + } +} +#endif // defined(__arm__) + +#if defined(__mips__) +bool SyscallSets::IsMipsPrivate(int sysno) { + switch (sysno) { + case __NR_cacheflush: + case __NR_cachectl: + return true; + default: + return false; + } +} + +bool SyscallSets::IsMipsMisc(int sysno) { + switch (sysno) { + case __NR_sysmips: + case __NR_unused150: + return true; + default: + return false; + } +} +#endif // defined(__mips__) +} // namespace sandbox. diff --git a/sandbox/linux/seccomp-bpf-helpers/syscall_sets.h b/sandbox/linux/seccomp-bpf-helpers/syscall_sets.h new file mode 100644 index 0000000000..5ba6335a95 --- /dev/null +++ b/sandbox/linux/seccomp-bpf-helpers/syscall_sets.h @@ -0,0 +1,112 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SYSCALL_SETS_H_ +#define SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SYSCALL_SETS_H_ + +#include "base/macros.h" +#include "build/build_config.h" +#include "sandbox/sandbox_export.h" + +// These are helpers to build seccomp-bpf policies, i.e. policies for a +// sandbox that reduces the Linux kernel's attack surface. Given their +// nature, they don't have any clear semantics and are completely +// "implementation-defined". + +namespace sandbox { + +class SANDBOX_EXPORT SyscallSets { + public: + static bool IsKill(int sysno); + static bool IsAllowedGettime(int sysno); + static bool IsCurrentDirectory(int sysno); + static bool IsUmask(int sysno); + // System calls that directly access the file system. They might acquire + // a new file descriptor or otherwise perform an operation directly + // via a path. + static bool IsFileSystem(int sysno); + static bool IsAllowedFileSystemAccessViaFd(int sysno); + static bool IsDeniedFileSystemAccessViaFd(int sysno); + static bool IsGetSimpleId(int sysno); + static bool IsProcessPrivilegeChange(int sysno); + static bool IsProcessGroupOrSession(int sysno); + static bool IsAllowedSignalHandling(int sysno); + static bool IsAllowedOperationOnFd(int sysno); + static bool IsKernelInternalApi(int sysno); + // This should be thought through in conjunction with IsFutex(). + static bool IsAllowedProcessStartOrDeath(int sysno); + // It's difficult to restrict those, but there is attack surface here. + static bool IsAllowedFutex(int sysno); + static bool IsAllowedEpoll(int sysno); + static bool IsAllowedGetOrModifySocket(int sysno); + static bool IsDeniedGetOrModifySocket(int sysno); + +#if defined(__i386__) || defined(__mips__) + // Big multiplexing system call for sockets. + static bool IsSocketCall(int sysno); +#endif + +#if defined(__x86_64__) || defined(__arm__) || defined(__mips__) || \ + defined(__aarch64__) + static bool IsNetworkSocketInformation(int sysno); +#endif + + static bool IsAllowedAddressSpaceAccess(int sysno); + static bool IsAllowedGeneralIo(int sysno); + static bool IsPrctl(int sysno); + static bool IsSeccomp(int sysno); + static bool IsAllowedBasicScheduler(int sysno); + static bool IsAdminOperation(int sysno); + static bool IsKernelModule(int sysno); + static bool IsGlobalFSViewChange(int sysno); + static bool IsFsControl(int sysno); + static bool IsNuma(int sysno); + static bool IsMessageQueue(int sysno); + static bool IsGlobalProcessEnvironment(int sysno); + static bool IsDebug(int sysno); + static bool IsGlobalSystemStatus(int sysno); + static bool IsEventFd(int sysno); + // Asynchronous I/O API. + static bool IsAsyncIo(int sysno); + static bool IsKeyManagement(int sysno); +#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) + static bool IsSystemVSemaphores(int sysno); +#endif +#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) + // These give a lot of ambient authority and bypass the setuid sandbox. + static bool IsSystemVSharedMemory(int sysno); +#endif + +#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) + static bool IsSystemVMessageQueue(int sysno); +#endif + +#if defined(__i386__) || defined(__mips__) + // Big system V multiplexing system call. + static bool IsSystemVIpc(int sysno); +#endif + + static bool IsAnySystemV(int sysno); + static bool IsAdvancedScheduler(int sysno); + static bool IsInotify(int sysno); + static bool IsFaNotify(int sysno); + static bool IsTimer(int sysno); + static bool IsAdvancedTimer(int sysno); + static bool IsExtendedAttributes(int sysno); + static bool IsMisc(int sysno); +#if defined(__arm__) + static bool IsArmPciConfig(int sysno); + static bool IsArmPrivate(int sysno); +#endif // defined(__arm__) +#if defined(__mips__) + static bool IsMipsPrivate(int sysno); + static bool IsMipsMisc(int sysno); +#endif // defined(__mips__) + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(SyscallSets); +}; + +} // namespace sandbox. + +#endif // SANDBOX_LINUX_SECCOMP_BPF_HELPERS_SYSCALL_SETS_H_ diff --git a/sandbox/linux/seccomp-bpf/bpf_tests_unittest.cc b/sandbox/linux/seccomp-bpf/bpf_tests_unittest.cc new file mode 100644 index 0000000000..63e1814c90 --- /dev/null +++ b/sandbox/linux/seccomp-bpf/bpf_tests_unittest.cc @@ -0,0 +1,153 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf/bpf_tests.h" + +#include <errno.h> +#include <sys/ptrace.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include "base/logging.h" +#include "base/memory/scoped_ptr.h" +#include "build/build_config.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl.h" +#include "sandbox/linux/bpf_dsl/policy.h" +#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" +#include "sandbox/linux/services/syscall_wrappers.h" +#include "sandbox/linux/system_headers/linux_syscalls.h" +#include "sandbox/linux/tests/unit_tests.h" +#include "testing/gtest/include/gtest/gtest.h" + +using sandbox::bpf_dsl::Allow; +using sandbox::bpf_dsl::Error; +using sandbox::bpf_dsl::ResultExpr; + +namespace sandbox { + +namespace { + +class FourtyTwo { + public: + static const int kMagicValue = 42; + FourtyTwo() : value_(kMagicValue) {} + int value() { return value_; } + + private: + int value_; + DISALLOW_COPY_AND_ASSIGN(FourtyTwo); +}; + +class EmptyClassTakingPolicy : public bpf_dsl::Policy { + public: + explicit EmptyClassTakingPolicy(FourtyTwo* fourty_two) { + BPF_ASSERT(fourty_two); + BPF_ASSERT(FourtyTwo::kMagicValue == fourty_two->value()); + } + ~EmptyClassTakingPolicy() override {} + + ResultExpr EvaluateSyscall(int sysno) const override { + DCHECK(SandboxBPF::IsValidSyscallNumber(sysno)); + return Allow(); + } +}; + +BPF_TEST(BPFTest, + BPFAUXPointsToClass, + EmptyClassTakingPolicy, + FourtyTwo /* *BPF_AUX */) { + // BPF_AUX should point to an instance of FourtyTwo. + BPF_ASSERT(BPF_AUX); + BPF_ASSERT(FourtyTwo::kMagicValue == BPF_AUX->value()); +} + +void DummyTestFunction(FourtyTwo *fourty_two) { +} + +TEST(BPFTest, BPFTesterCompatibilityDelegateLeakTest) { + // Don't do anything, simply gives dynamic tools an opportunity to detect + // leaks. + { + BPFTesterCompatibilityDelegate<EmptyClassTakingPolicy, FourtyTwo> + simple_delegate(DummyTestFunction); + } + { + // Test polymorphism. + scoped_ptr<BPFTesterDelegate> simple_delegate( + new BPFTesterCompatibilityDelegate<EmptyClassTakingPolicy, FourtyTwo>( + DummyTestFunction)); + } +} + +class EnosysPtracePolicy : public bpf_dsl::Policy { + public: + EnosysPtracePolicy() { my_pid_ = sys_getpid(); } + ~EnosysPtracePolicy() override { + // Policies should be able to bind with the process on which they are + // created. They should never be created in a parent process. + BPF_ASSERT_EQ(my_pid_, sys_getpid()); + } + + ResultExpr EvaluateSyscall(int system_call_number) const override { + CHECK(SandboxBPF::IsValidSyscallNumber(system_call_number)); + if (system_call_number == __NR_ptrace) { + // The EvaluateSyscall function should run in the process that created + // the current object. + BPF_ASSERT_EQ(my_pid_, sys_getpid()); + return Error(ENOSYS); + } else { + return Allow(); + } + } + + private: + pid_t my_pid_; + DISALLOW_COPY_AND_ASSIGN(EnosysPtracePolicy); +}; + +class BasicBPFTesterDelegate : public BPFTesterDelegate { + public: + BasicBPFTesterDelegate() {} + ~BasicBPFTesterDelegate() override {} + + scoped_ptr<bpf_dsl::Policy> GetSandboxBPFPolicy() override { + return scoped_ptr<bpf_dsl::Policy>(new EnosysPtracePolicy()); + } + void RunTestFunction() override { + errno = 0; + int ret = ptrace(PTRACE_TRACEME, -1, NULL, NULL); + BPF_ASSERT(-1 == ret); + BPF_ASSERT(ENOSYS == errno); + } + + private: + DISALLOW_COPY_AND_ASSIGN(BasicBPFTesterDelegate); +}; + +// This is the most powerful and complex way to create a BPF test, but it +// requires a full class definition (BasicBPFTesterDelegate). +BPF_TEST_D(BPFTest, BPFTestWithDelegateClass, BasicBPFTesterDelegate); + +// This is the simplest form of BPF tests. +BPF_TEST_C(BPFTest, BPFTestWithInlineTest, EnosysPtracePolicy) { + errno = 0; + int ret = ptrace(PTRACE_TRACEME, -1, NULL, NULL); + BPF_ASSERT(-1 == ret); + BPF_ASSERT(ENOSYS == errno); +} + +const char kHelloMessage[] = "Hello"; + +BPF_DEATH_TEST_C(BPFTest, + BPFDeathTestWithInlineTest, + DEATH_MESSAGE(kHelloMessage), + EnosysPtracePolicy) { + LOG(ERROR) << kHelloMessage; + _exit(1); +} + +} // namespace + +} // namespace sandbox diff --git a/sandbox/linux/seccomp-bpf/die.cc b/sandbox/linux/seccomp-bpf/die.cc new file mode 100644 index 0000000000..3baf1f13d9 --- /dev/null +++ b/sandbox/linux/seccomp-bpf/die.cc @@ -0,0 +1,93 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf/die.h" + +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <sys/prctl.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include <string> + +#include "base/logging.h" +#include "base/posix/eintr_wrapper.h" +#include "sandbox/linux/seccomp-bpf/syscall.h" +#include "sandbox/linux/services/syscall_wrappers.h" +#include "sandbox/linux/system_headers/linux_signal.h" + +namespace sandbox { + +void Die::ExitGroup() { + // exit_group() should exit our program. After all, it is defined as a + // function that doesn't return. But things can theoretically go wrong. + // Especially, since we are dealing with system call filters. Continuing + // execution would be very bad in most cases where ExitGroup() gets called. + // So, we'll try a few other strategies too. + Syscall::Call(__NR_exit_group, 1); + + // We have no idea what our run-time environment looks like. So, signal + // handlers might or might not do the right thing. Try to reset settings + // to a defined state; but we have not way to verify whether we actually + // succeeded in doing so. Nonetheless, triggering a fatal signal could help + // us terminate. + struct sigaction sa = {}; + sa.sa_handler = LINUX_SIG_DFL; + sa.sa_flags = LINUX_SA_RESTART; + sys_sigaction(LINUX_SIGSEGV, &sa, nullptr); + Syscall::Call(__NR_prctl, PR_SET_DUMPABLE, (void*)0, (void*)0, (void*)0); + if (*(volatile char*)0) { + } + + // If there is no way for us to ask for the program to exit, the next + // best thing we can do is to loop indefinitely. Maybe, somebody will notice + // and file a bug... + // We in fact retry the system call inside of our loop so that it will + // stand out when somebody tries to diagnose the problem by using "strace". + for (;;) { + Syscall::Call(__NR_exit_group, 1); + } +} + +void Die::SandboxDie(const char* msg, const char* file, int line) { + if (simple_exit_) { + LogToStderr(msg, file, line); + } else { + logging::LogMessage(file, line, logging::LOG_FATAL).stream() << msg; + } + ExitGroup(); +} + +void Die::RawSandboxDie(const char* msg) { + if (!msg) + msg = ""; + RAW_LOG(FATAL, msg); + ExitGroup(); +} + +void Die::SandboxInfo(const char* msg, const char* file, int line) { + if (!suppress_info_) { + logging::LogMessage(file, line, logging::LOG_INFO).stream() << msg; + } +} + +void Die::LogToStderr(const char* msg, const char* file, int line) { + if (msg) { + char buf[40]; + snprintf(buf, sizeof(buf), "%d", line); + std::string s = std::string(file) + ":" + buf + ":" + msg + "\n"; + + // No need to loop. Short write()s are unlikely and if they happen we + // probably prefer them over a loop that blocks. + ignore_result( + HANDLE_EINTR(Syscall::Call(__NR_write, 2, s.c_str(), s.length()))); + } +} + +bool Die::simple_exit_ = false; +bool Die::suppress_info_ = false; + +} // namespace sandbox diff --git a/sandbox/linux/seccomp-bpf/die.h b/sandbox/linux/seccomp-bpf/die.h new file mode 100644 index 0000000000..b3f3f72c2f --- /dev/null +++ b/sandbox/linux/seccomp-bpf/die.h @@ -0,0 +1,68 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SECCOMP_BPF_DIE_H__ +#define SANDBOX_LINUX_SECCOMP_BPF_DIE_H__ + +#include "base/macros.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +// This is the main API for using this file. Prints a error message and +// exits with a fatal error. This is not async-signal safe. +#define SANDBOX_DIE(m) sandbox::Die::SandboxDie(m, __FILE__, __LINE__) + +// An async signal safe version of the same API. Won't print the filename +// and line numbers. +#define RAW_SANDBOX_DIE(m) sandbox::Die::RawSandboxDie(m) + +// Adds an informational message to the log file or stderr as appropriate. +#define SANDBOX_INFO(m) sandbox::Die::SandboxInfo(m, __FILE__, __LINE__) + +class SANDBOX_EXPORT Die { + public: + // Terminate the program, even if the current sandbox policy prevents some + // of the more commonly used functions used for exiting. + // Most users would want to call SANDBOX_DIE() instead, as it logs extra + // information. But calling ExitGroup() is correct and in some rare cases + // preferable. So, we make it part of the public API. + static void ExitGroup() __attribute__((noreturn)); + + // This method gets called by SANDBOX_DIE(). There is normally no reason + // to call it directly unless you are defining your own exiting macro. + static void SandboxDie(const char* msg, const char* file, int line) + __attribute__((noreturn)); + + static void RawSandboxDie(const char* msg) __attribute__((noreturn)); + + // This method gets called by SANDBOX_INFO(). There is normally no reason + // to call it directly unless you are defining your own logging macro. + static void SandboxInfo(const char* msg, const char* file, int line); + + // Writes a message to stderr. Used as a fall-back choice, if we don't have + // any other way to report an error. + static void LogToStderr(const char* msg, const char* file, int line); + + // We generally want to run all exit handlers. This means, on SANDBOX_DIE() + // we should be calling LOG(FATAL). But there are some situations where + // we just need to print a message and then terminate. This would typically + // happen in cases where we consume the error message internally (e.g. in + // unit tests or in the supportsSeccompSandbox() method). + static void EnableSimpleExit() { simple_exit_ = true; } + + // Sometimes we need to disable all informational messages (e.g. from within + // unittests). + static void SuppressInfoMessages(bool flag) { suppress_info_ = flag; } + + private: + static bool simple_exit_; + static bool suppress_info_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(Die); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SECCOMP_BPF_DIE_H__ diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.cc b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc new file mode 100644 index 0000000000..3e4f0adf53 --- /dev/null +++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.cc @@ -0,0 +1,279 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" + +// Some headers on Android are missing cdefs: crbug.com/172337. +// (We can't use OS_ANDROID here since build_config.h is not included). +#if defined(ANDROID) +#include <sys/cdefs.h> +#endif + +#include <errno.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <unistd.h> + +#include "base/compiler_specific.h" +#include "base/files/scoped_file.h" +#include "base/logging.h" +#include "base/macros.h" +#include "base/memory/scoped_ptr.h" +#include "base/posix/eintr_wrapper.h" +#include "sandbox/linux/bpf_dsl/codegen.h" +#include "sandbox/linux/bpf_dsl/policy.h" +#include "sandbox/linux/bpf_dsl/policy_compiler.h" +#include "sandbox/linux/bpf_dsl/seccomp_macros.h" +#include "sandbox/linux/bpf_dsl/syscall_set.h" +#include "sandbox/linux/seccomp-bpf/die.h" +#include "sandbox/linux/seccomp-bpf/syscall.h" +#include "sandbox/linux/seccomp-bpf/trap.h" +#include "sandbox/linux/services/proc_util.h" +#include "sandbox/linux/services/syscall_wrappers.h" +#include "sandbox/linux/services/thread_helpers.h" +#include "sandbox/linux/system_headers/linux_filter.h" +#include "sandbox/linux/system_headers/linux_seccomp.h" +#include "sandbox/linux/system_headers/linux_syscalls.h" +#include "third_party/valgrind/valgrind.h" + +namespace sandbox { + +namespace { + +bool IsRunningOnValgrind() { return RUNNING_ON_VALGRIND; } + +bool IsSingleThreaded(int proc_fd) { + return ThreadHelpers::IsSingleThreaded(proc_fd); +} + +// Check if the kernel supports seccomp-filter (a.k.a. seccomp mode 2) via +// prctl(). +bool KernelSupportsSeccompBPF() { + errno = 0; + const int rv = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, nullptr); + + if (rv == -1 && EFAULT == errno) { + return true; + } + return false; +} + +// LG introduced a buggy syscall, sys_set_media_ext, with the same number as +// seccomp. Return true if the current kernel has this buggy syscall. +// +// We want this to work with upcoming versions of seccomp, so we pass bogus +// flags that are unlikely to ever be used by the kernel. A normal kernel would +// return -EINVAL, but a buggy LG kernel would return 1. +bool KernelHasLGBug() { +#if defined(OS_ANDROID) + // sys_set_media will see this as NULL, which should be a safe (non-crashing) + // way to invoke it. A genuine seccomp syscall will see it as + // SECCOMP_SET_MODE_STRICT. + const unsigned int operation = 0; + // Chosen by fair dice roll. Guaranteed to be random. + const unsigned int flags = 0xf7a46a5c; + const int rv = sys_seccomp(operation, flags, nullptr); + // A genuine kernel would return -EINVAL (which would set rv to -1 and errno + // to EINVAL), or at the very least return some kind of error (which would + // set rv to -1). Any other behavior indicates that whatever code received + // our syscall was not the real seccomp. + if (rv != -1) { + return true; + } +#endif // defined(OS_ANDROID) + + return false; +} + +// Check if the kernel supports seccomp-filter via the seccomp system call +// and the TSYNC feature to enable seccomp on all threads. +bool KernelSupportsSeccompTsync() { + if (KernelHasLGBug()) { + return false; + } + + errno = 0; + const int rv = + sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, nullptr); + + if (rv == -1 && errno == EFAULT) { + return true; + } else { + // TODO(jln): turn these into DCHECK after 417888 is considered fixed. + CHECK_EQ(-1, rv); + CHECK(ENOSYS == errno || EINVAL == errno); + return false; + } +} + +uint64_t EscapePC() { + intptr_t rv = Syscall::Call(-1); + if (rv == -1 && errno == ENOSYS) { + return 0; + } + return static_cast<uint64_t>(static_cast<uintptr_t>(rv)); +} + +} // namespace + +SandboxBPF::SandboxBPF(bpf_dsl::Policy* policy) + : proc_fd_(), sandbox_has_started_(false), policy_(policy) { +} + +SandboxBPF::~SandboxBPF() { +} + +// static +bool SandboxBPF::SupportsSeccompSandbox(SeccompLevel level) { + // Never pretend to support seccomp with Valgrind, as it + // throws the tool off. + if (IsRunningOnValgrind()) { + return false; + } + + switch (level) { + case SeccompLevel::SINGLE_THREADED: + return KernelSupportsSeccompBPF(); + case SeccompLevel::MULTI_THREADED: + return KernelSupportsSeccompTsync(); + } + NOTREACHED(); + return false; +} + +bool SandboxBPF::StartSandbox(SeccompLevel seccomp_level) { + DCHECK(policy_); + CHECK(seccomp_level == SeccompLevel::SINGLE_THREADED || + seccomp_level == SeccompLevel::MULTI_THREADED); + + if (sandbox_has_started_) { + SANDBOX_DIE( + "Cannot repeatedly start sandbox. Create a separate Sandbox " + "object instead."); + return false; + } + + if (!proc_fd_.is_valid()) { + SetProcFd(ProcUtil::OpenProc()); + } + + const bool supports_tsync = KernelSupportsSeccompTsync(); + + if (seccomp_level == SeccompLevel::SINGLE_THREADED) { + // Wait for /proc/self/task/ to update if needed and assert the + // process is single threaded. + ThreadHelpers::AssertSingleThreaded(proc_fd_.get()); + } else if (seccomp_level == SeccompLevel::MULTI_THREADED) { + if (IsSingleThreaded(proc_fd_.get())) { + SANDBOX_DIE("Cannot start sandbox; " + "process may be single-threaded when reported as not"); + return false; + } + if (!supports_tsync) { + SANDBOX_DIE("Cannot start sandbox; kernel does not support synchronizing " + "filters for a threadgroup"); + return false; + } + } + + // We no longer need access to any files in /proc. We want to do this + // before installing the filters, just in case that our policy denies + // close(). + if (proc_fd_.is_valid()) { + proc_fd_.reset(); + } + + // Install the filters. + InstallFilter(supports_tsync || + seccomp_level == SeccompLevel::MULTI_THREADED); + + return true; +} + +void SandboxBPF::SetProcFd(base::ScopedFD proc_fd) { + proc_fd_.swap(proc_fd); +} + +// static +bool SandboxBPF::IsValidSyscallNumber(int sysnum) { + return SyscallSet::IsValid(sysnum); +} + +// static +bool SandboxBPF::IsRequiredForUnsafeTrap(int sysno) { + return bpf_dsl::PolicyCompiler::IsRequiredForUnsafeTrap(sysno); +} + +// static +intptr_t SandboxBPF::ForwardSyscall(const struct arch_seccomp_data& args) { + return Syscall::Call( + args.nr, static_cast<intptr_t>(args.args[0]), + static_cast<intptr_t>(args.args[1]), static_cast<intptr_t>(args.args[2]), + static_cast<intptr_t>(args.args[3]), static_cast<intptr_t>(args.args[4]), + static_cast<intptr_t>(args.args[5])); +} + +scoped_ptr<CodeGen::Program> SandboxBPF::AssembleFilter( + bool force_verification) { +#if !defined(NDEBUG) + force_verification = true; +#endif + DCHECK(policy_); + + bpf_dsl::PolicyCompiler compiler(policy_.get(), Trap::Registry()); + if (Trap::SandboxDebuggingAllowedByUser()) { + compiler.DangerousSetEscapePC(EscapePC()); + } + return compiler.Compile(force_verification); +} + +void SandboxBPF::InstallFilter(bool must_sync_threads) { + // We want to be very careful in not imposing any requirements on the + // policies that are set with SetSandboxPolicy(). This means, as soon as + // the sandbox is active, we shouldn't be relying on libraries that could + // be making system calls. This, for example, means we should avoid + // using the heap and we should avoid using STL functions. + // Temporarily copy the contents of the "program" vector into a + // stack-allocated array; and then explicitly destroy that object. + // This makes sure we don't ex- or implicitly call new/delete after we + // installed the BPF filter program in the kernel. Depending on the + // system memory allocator that is in effect, these operators can result + // in system calls to things like munmap() or brk(). + CodeGen::Program* program = AssembleFilter(false).release(); + + struct sock_filter bpf[program->size()]; + const struct sock_fprog prog = {static_cast<unsigned short>(program->size()), + bpf}; + memcpy(bpf, &(*program)[0], sizeof(bpf)); + delete program; + + // Make an attempt to release memory that is no longer needed here, rather + // than in the destructor. Try to avoid as much as possible to presume of + // what will be possible to do in the new (sandboxed) execution environment. + policy_.reset(); + + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + SANDBOX_DIE("Kernel refuses to enable no-new-privs"); + } + + // Install BPF filter program. If the thread state indicates multi-threading + // support, then the kernel hass the seccomp system call. Otherwise, fall + // back on prctl, which requires the process to be single-threaded. + if (must_sync_threads) { + int rv = + sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &prog); + if (rv) { + SANDBOX_DIE( + "Kernel refuses to turn on and synchronize threads for BPF filters"); + } + } else { + if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { + SANDBOX_DIE("Kernel refuses to turn on BPF filters"); + } + } + + sandbox_has_started_ = true; +} + +} // namespace sandbox diff --git a/sandbox/linux/seccomp-bpf/sandbox_bpf.h b/sandbox/linux/seccomp-bpf/sandbox_bpf.h new file mode 100644 index 0000000000..96cceb5648 --- /dev/null +++ b/sandbox/linux/seccomp-bpf/sandbox_bpf.h @@ -0,0 +1,118 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SECCOMP_BPF_SANDBOX_BPF_H_ +#define SANDBOX_LINUX_SECCOMP_BPF_SANDBOX_BPF_H_ + +#include <stdint.h> + +#include "base/files/scoped_file.h" +#include "base/macros.h" +#include "base/memory/scoped_ptr.h" +#include "sandbox/linux/bpf_dsl/codegen.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { +struct arch_seccomp_data; +namespace bpf_dsl { +class Policy; +} + +// This class can be used to apply a syscall sandboxing policy expressed in a +// bpf_dsl::Policy object to the current process. +// Syscall sandboxing policies get inherited by subprocesses and, once applied, +// can never be removed for the lifetime of the process. +class SANDBOX_EXPORT SandboxBPF { + public: + enum class SeccompLevel { + SINGLE_THREADED, + MULTI_THREADED, + }; + + // Ownership of |policy| is transfered here to the sandbox object. + // nullptr is allowed for unit tests. + explicit SandboxBPF(bpf_dsl::Policy* policy); + // NOTE: Setting a policy and starting the sandbox is a one-way operation. + // The kernel does not provide any option for unloading a loaded sandbox. The + // sandbox remains engaged even when the object is destructed. + ~SandboxBPF(); + + // Detect if the kernel supports the specified seccomp level. + // See StartSandbox() for a description of these. + static bool SupportsSeccompSandbox(SeccompLevel level); + + // This is the main public entry point. It sets up the resources needed by + // the sandbox, and enters Seccomp mode. + // The calling process must provide a |level| to tell the sandbox which type + // of kernel support it should engage. + // SINGLE_THREADED will only sandbox the calling thread. Since it would be a + // security risk, the sandbox will also check that the current process is + // single threaded and crash if it isn't the case. + // MULTI_THREADED requires more recent kernel support and allows to sandbox + // all the threads of the current process. Be mindful of potential races, + // with other threads using disallowed system calls either before or after + // the sandbox is engaged. + // + // It is possible to stack multiple sandboxes by creating separate "Sandbox" + // objects and calling "StartSandbox()" on each of them. Please note, that + // this requires special care, though, as newly stacked sandboxes can never + // relax restrictions imposed by earlier sandboxes. Furthermore, installing + // a new policy requires making system calls, that might already be + // disallowed. + // Finally, stacking does add more kernel overhead than having a single + // combined policy. So, it should only be used if there are no alternatives. + bool StartSandbox(SeccompLevel level) WARN_UNUSED_RESULT; + + // The sandbox needs to be able to access files in "/proc/self/". If + // this directory is not accessible when "StartSandbox()" gets called, the + // caller must provide an already opened file descriptor by calling + // "SetProcFd()". + // The sandbox becomes the new owner of this file descriptor and will + // close it when "StartSandbox()" executes or when the sandbox object + // disappears. + void SetProcFd(base::ScopedFD proc_fd); + + // Checks whether a particular system call number is valid on the current + // architecture. + static bool IsValidSyscallNumber(int sysnum); + + // UnsafeTraps require some syscalls to always be allowed. + // This helper function returns true for these calls. + static bool IsRequiredForUnsafeTrap(int sysno); + + // From within an UnsafeTrap() it is often useful to be able to execute + // the system call that triggered the trap. The ForwardSyscall() method + // makes this easy. It is more efficient than calling glibc's syscall() + // function, as it avoid the extra round-trip to the signal handler. And + // it automatically does the correct thing to report kernel-style error + // conditions, rather than setting errno. See the comments for TrapFnc for + // details. In other words, the return value from ForwardSyscall() is + // directly suitable as a return value for a trap handler. + static intptr_t ForwardSyscall(const struct arch_seccomp_data& args); + + // Assembles a BPF filter program from the current policy. After calling this + // function, you must not call any other sandboxing function. + // Typically, AssembleFilter() is only used by unit tests and by sandbox + // internals. It should not be used by production code. + // For performance reasons, we normally only run the assembled BPF program + // through the verifier, iff the program was built in debug mode. + // But by setting "force_verification", the caller can request that the + // verifier is run unconditionally. This is useful for unittests. + scoped_ptr<CodeGen::Program> AssembleFilter(bool force_verification); + + private: + // Assembles and installs a filter based on the policy that has previously + // been configured with SetSandboxPolicy(). + void InstallFilter(bool must_sync_threads); + + base::ScopedFD proc_fd_; + bool sandbox_has_started_; + scoped_ptr<bpf_dsl::Policy> policy_; + + DISALLOW_COPY_AND_ASSIGN(SandboxBPF); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SECCOMP_BPF_SANDBOX_BPF_H_ diff --git a/sandbox/linux/seccomp-bpf/syscall.cc b/sandbox/linux/seccomp-bpf/syscall.cc new file mode 100644 index 0000000000..bc6461f117 --- /dev/null +++ b/sandbox/linux/seccomp-bpf/syscall.cc @@ -0,0 +1,421 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf/syscall.h" + +#include <errno.h> +#include <stdint.h> + +#include "base/logging.h" +#include "sandbox/linux/bpf_dsl/seccomp_macros.h" + +namespace sandbox { + +namespace { + +#if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \ + defined(ARCH_CPU_MIPS_FAMILY) +// Number that's not currently used by any Linux kernel ABIs. +const int kInvalidSyscallNumber = 0x351d3; +#else +#error Unrecognized architecture +#endif + +asm(// We need to be able to tell the kernel exactly where we made a + // system call. The C++ compiler likes to sometimes clone or + // inline code, which would inadvertently end up duplicating + // the entry point. + // "gcc" can suppress code duplication with suitable function + // attributes, but "clang" doesn't have this ability. + // The "clang" developer mailing list suggested that the correct + // and portable solution is a file-scope assembly block. + // N.B. We do mark our code as a proper function so that backtraces + // work correctly. But we make absolutely no attempt to use the + // ABI's calling conventions for passing arguments. We will only + // ever be called from assembly code and thus can pick more + // suitable calling conventions. +#if defined(__i386__) + ".text\n" + ".align 16, 0x90\n" + ".type SyscallAsm, @function\n" + "SyscallAsm:.cfi_startproc\n" + // Check if "%eax" is negative. If so, do not attempt to make a + // system call. Instead, compute the return address that is visible + // to the kernel after we execute "int $0x80". This address can be + // used as a marker that BPF code inspects. + "test %eax, %eax\n" + "jge 1f\n" + // Always, make sure that our code is position-independent, or + // address space randomization might not work on i386. This means, + // we can't use "lea", but instead have to rely on "call/pop". + "call 0f; .cfi_adjust_cfa_offset 4\n" + "0:pop %eax; .cfi_adjust_cfa_offset -4\n" + "addl $2f-0b, %eax\n" + "ret\n" + // Save register that we don't want to clobber. On i386, we need to + // save relatively aggressively, as there are a couple or registers + // that are used internally (e.g. %ebx for position-independent + // code, and %ebp for the frame pointer), and as we need to keep at + // least a few registers available for the register allocator. + "1:push %esi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset esi, 0\n" + "push %edi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset edi, 0\n" + "push %ebx; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebx, 0\n" + "push %ebp; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebp, 0\n" + // Copy entries from the array holding the arguments into the + // correct CPU registers. + "movl 0(%edi), %ebx\n" + "movl 4(%edi), %ecx\n" + "movl 8(%edi), %edx\n" + "movl 12(%edi), %esi\n" + "movl 20(%edi), %ebp\n" + "movl 16(%edi), %edi\n" + // Enter the kernel. + "int $0x80\n" + // This is our "magic" return address that the BPF filter sees. + "2:" + // Restore any clobbered registers that we didn't declare to the + // compiler. + "pop %ebp; .cfi_restore ebp; .cfi_adjust_cfa_offset -4\n" + "pop %ebx; .cfi_restore ebx; .cfi_adjust_cfa_offset -4\n" + "pop %edi; .cfi_restore edi; .cfi_adjust_cfa_offset -4\n" + "pop %esi; .cfi_restore esi; .cfi_adjust_cfa_offset -4\n" + "ret\n" + ".cfi_endproc\n" + "9:.size SyscallAsm, 9b-SyscallAsm\n" +#elif defined(__x86_64__) + ".text\n" + ".align 16, 0x90\n" + ".type SyscallAsm, @function\n" + "SyscallAsm:.cfi_startproc\n" + // Check if "%rdi" is negative. If so, do not attempt to make a + // system call. Instead, compute the return address that is visible + // to the kernel after we execute "syscall". This address can be + // used as a marker that BPF code inspects. + "test %rdi, %rdi\n" + "jge 1f\n" + // Always make sure that our code is position-independent, or the + // linker will throw a hissy fit on x86-64. + "lea 2f(%rip), %rax\n" + "ret\n" + // Now we load the registers used to pass arguments to the system + // call: system call number in %rax, and arguments in %rdi, %rsi, + // %rdx, %r10, %r8, %r9. Note: These are all caller-save registers + // (only %rbx, %rbp, %rsp, and %r12-%r15 are callee-save), so no + // need to worry here about spilling registers or CFI directives. + "1:movq %rdi, %rax\n" + "movq 0(%rsi), %rdi\n" + "movq 16(%rsi), %rdx\n" + "movq 24(%rsi), %r10\n" + "movq 32(%rsi), %r8\n" + "movq 40(%rsi), %r9\n" + "movq 8(%rsi), %rsi\n" + // Enter the kernel. + "syscall\n" + // This is our "magic" return address that the BPF filter sees. + "2:ret\n" + ".cfi_endproc\n" + "9:.size SyscallAsm, 9b-SyscallAsm\n" +#elif defined(__arm__) + // Throughout this file, we use the same mode (ARM vs. thumb) + // that the C++ compiler uses. This means, when transfering control + // from C++ to assembly code, we do not need to switch modes (e.g. + // by using the "bx" instruction). It also means that our assembly + // code should not be invoked directly from code that lives in + // other compilation units, as we don't bother implementing thumb + // interworking. That's OK, as we don't make any of the assembly + // symbols public. They are all local to this file. + ".text\n" + ".align 2\n" + ".type SyscallAsm, %function\n" +#if defined(__thumb__) + ".thumb_func\n" +#else + ".arm\n" +#endif + "SyscallAsm:\n" +#if !defined(__native_client_nonsfi__) + // .fnstart and .fnend pseudo operations creates unwind table. + // It also creates a reference to the symbol __aeabi_unwind_cpp_pr0, which + // is not provided by PNaCl toolchain. Disable it. + ".fnstart\n" +#endif + "@ args = 0, pretend = 0, frame = 8\n" + "@ frame_needed = 1, uses_anonymous_args = 0\n" +#if defined(__thumb__) + ".cfi_startproc\n" + "push {r7, lr}\n" + ".save {r7, lr}\n" + ".cfi_offset 14, -4\n" + ".cfi_offset 7, -8\n" + ".cfi_def_cfa_offset 8\n" +#else + "stmfd sp!, {fp, lr}\n" + "add fp, sp, #4\n" +#endif + // Check if "r0" is negative. If so, do not attempt to make a + // system call. Instead, compute the return address that is visible + // to the kernel after we execute "swi 0". This address can be + // used as a marker that BPF code inspects. + "cmp r0, #0\n" + "bge 1f\n" + "adr r0, 2f\n" + "b 2f\n" + // We declared (almost) all clobbered registers to the compiler. On + // ARM there is no particular register pressure. So, we can go + // ahead and directly copy the entries from the arguments array + // into the appropriate CPU registers. + "1:ldr r5, [r6, #20]\n" + "ldr r4, [r6, #16]\n" + "ldr r3, [r6, #12]\n" + "ldr r2, [r6, #8]\n" + "ldr r1, [r6, #4]\n" + "mov r7, r0\n" + "ldr r0, [r6, #0]\n" + // Enter the kernel + "swi 0\n" +// Restore the frame pointer. Also restore the program counter from +// the link register; this makes us return to the caller. +#if defined(__thumb__) + "2:pop {r7, pc}\n" + ".cfi_endproc\n" +#else + "2:ldmfd sp!, {fp, pc}\n" +#endif +#if !defined(__native_client_nonsfi__) + // Do not use .fnstart and .fnend for PNaCl toolchain. See above comment, + // for more details. + ".fnend\n" +#endif + "9:.size SyscallAsm, 9b-SyscallAsm\n" +#elif defined(__mips__) + ".text\n" + ".align 4\n" + ".type SyscallAsm, @function\n" + "SyscallAsm:.ent SyscallAsm\n" + ".frame $sp, 40, $ra\n" + ".set push\n" + ".set noreorder\n" + "addiu $sp, $sp, -40\n" + "sw $ra, 36($sp)\n" + // Check if "v0" is negative. If so, do not attempt to make a + // system call. Instead, compute the return address that is visible + // to the kernel after we execute "syscall". This address can be + // used as a marker that BPF code inspects. + "bgez $v0, 1f\n" + " nop\n" + "la $v0, 2f\n" + "b 2f\n" + " nop\n" + // On MIPS first four arguments go to registers a0 - a3 and any + // argument after that goes to stack. We can go ahead and directly + // copy the entries from the arguments array into the appropriate + // CPU registers and on the stack. + "1:lw $a3, 28($a0)\n" + "lw $a2, 24($a0)\n" + "lw $a1, 20($a0)\n" + "lw $t0, 16($a0)\n" + "sw $a3, 28($sp)\n" + "sw $a2, 24($sp)\n" + "sw $a1, 20($sp)\n" + "sw $t0, 16($sp)\n" + "lw $a3, 12($a0)\n" + "lw $a2, 8($a0)\n" + "lw $a1, 4($a0)\n" + "lw $a0, 0($a0)\n" + // Enter the kernel + "syscall\n" + // This is our "magic" return address that the BPF filter sees. + // Restore the return address from the stack. + "2:lw $ra, 36($sp)\n" + "jr $ra\n" + " addiu $sp, $sp, 40\n" + ".set pop\n" + ".end SyscallAsm\n" + ".size SyscallAsm,.-SyscallAsm\n" +#elif defined(__aarch64__) + ".text\n" + ".align 2\n" + ".type SyscallAsm, %function\n" + "SyscallAsm:\n" + ".cfi_startproc\n" + "cmp x0, #0\n" + "b.ge 1f\n" + "adr x0,2f\n" + "b 2f\n" + "1:ldr x5, [x6, #40]\n" + "ldr x4, [x6, #32]\n" + "ldr x3, [x6, #24]\n" + "ldr x2, [x6, #16]\n" + "ldr x1, [x6, #8]\n" + "mov x8, x0\n" + "ldr x0, [x6, #0]\n" + // Enter the kernel + "svc 0\n" + "2:ret\n" + ".cfi_endproc\n" + ".size SyscallAsm, .-SyscallAsm\n" +#endif + ); // asm + +#if defined(__x86_64__) +extern "C" { +intptr_t SyscallAsm(intptr_t nr, const intptr_t args[6]); +} +#endif + +} // namespace + +intptr_t Syscall::InvalidCall() { + // Explicitly pass eight zero arguments just in case. + return Call(kInvalidSyscallNumber, 0, 0, 0, 0, 0, 0, 0, 0); +} + +intptr_t Syscall::Call(int nr, + intptr_t p0, + intptr_t p1, + intptr_t p2, + intptr_t p3, + intptr_t p4, + intptr_t p5, + intptr_t p6, + intptr_t p7) { + // We rely on "intptr_t" to be the exact size as a "void *". This is + // typically true, but just in case, we add a check. The language + // specification allows platforms some leeway in cases, where + // "sizeof(void *)" is not the same as "sizeof(void (*)())". We expect + // that this would only be an issue for IA64, which we are currently not + // planning on supporting. And it is even possible that this would work + // on IA64, but for lack of actual hardware, I cannot test. + static_assert(sizeof(void*) == sizeof(intptr_t), + "pointer types and intptr_t must be exactly the same size"); + + // TODO(nedeljko): Enable use of more than six parameters on architectures + // where that makes sense. +#if defined(__mips__) + const intptr_t args[8] = {p0, p1, p2, p3, p4, p5, p6, p7}; +#else + DCHECK_EQ(p6, 0) << " Support for syscalls with more than six arguments not " + "added for this architecture"; + DCHECK_EQ(p7, 0) << " Support for syscalls with more than six arguments not " + "added for this architecture"; + const intptr_t args[6] = {p0, p1, p2, p3, p4, p5}; +#endif // defined(__mips__) + +// Invoke our file-scope assembly code. The constraints have been picked +// carefully to match what the rest of the assembly code expects in input, +// output, and clobbered registers. +#if defined(__i386__) + intptr_t ret = nr; + asm volatile( + "call SyscallAsm\n" + // N.B. These are not the calling conventions normally used by the ABI. + : "=a"(ret) + : "0"(ret), "D"(args) + : "cc", "esp", "memory", "ecx", "edx"); +#elif defined(__x86_64__) + intptr_t ret = SyscallAsm(nr, args); +#elif defined(__arm__) + intptr_t ret; + { + register intptr_t inout __asm__("r0") = nr; + register const intptr_t* data __asm__("r6") = args; + asm volatile( + "bl SyscallAsm\n" + // N.B. These are not the calling conventions normally used by the ABI. + : "=r"(inout) + : "0"(inout), "r"(data) + : "cc", + "lr", + "memory", + "r1", + "r2", + "r3", + "r4", + "r5" +#if !defined(__thumb__) + // In thumb mode, we cannot use "r7" as a general purpose register, as + // it is our frame pointer. We have to manually manage and preserve + // it. + // In ARM mode, we have a dedicated frame pointer register and "r7" is + // thus available as a general purpose register. We don't preserve it, + // but instead mark it as clobbered. + , + "r7" +#endif // !defined(__thumb__) + ); + ret = inout; + } +#elif defined(__mips__) + int err_status; + intptr_t ret = Syscall::SandboxSyscallRaw(nr, args, &err_status); + + if (err_status) { + // On error, MIPS returns errno from syscall instead of -errno. + // The purpose of this negation is for SandboxSyscall() to behave + // more like it would on other architectures. + ret = -ret; + } +#elif defined(__aarch64__) + intptr_t ret; + { + register intptr_t inout __asm__("x0") = nr; + register const intptr_t* data __asm__("x6") = args; + asm volatile("bl SyscallAsm\n" + : "=r"(inout) + : "0"(inout), "r"(data) + : "memory", "x1", "x2", "x3", "x4", "x5", "x8", "x30"); + ret = inout; + } + +#else +#error "Unimplemented architecture" +#endif + return ret; +} + +void Syscall::PutValueInUcontext(intptr_t ret_val, ucontext_t* ctx) { +#if defined(__mips__) + // Mips ABI states that on error a3 CPU register has non zero value and if + // there is no error, it should be zero. + if (ret_val <= -1 && ret_val >= -4095) { + // |ret_val| followes the Syscall::Call() convention of being -errno on + // errors. In order to write correct value to return register this sign + // needs to be changed back. + ret_val = -ret_val; + SECCOMP_PARM4(ctx) = 1; + } else + SECCOMP_PARM4(ctx) = 0; +#endif + SECCOMP_RESULT(ctx) = static_cast<greg_t>(ret_val); +} + +#if defined(__mips__) +intptr_t Syscall::SandboxSyscallRaw(int nr, + const intptr_t* args, + intptr_t* err_ret) { + register intptr_t ret __asm__("v0") = nr; + // a3 register becomes non zero on error. + register intptr_t err_stat __asm__("a3") = 0; + { + register const intptr_t* data __asm__("a0") = args; + asm volatile( + "la $t9, SyscallAsm\n" + "jalr $t9\n" + " nop\n" + : "=r"(ret), "=r"(err_stat) + : "0"(ret), + "r"(data) + // a2 is in the clober list so inline assembly can not change its + // value. + : "memory", "ra", "t9", "a2"); + } + + // Set an error status so it can be used outside of this function + *err_ret = err_stat; + + return ret; +} +#endif // defined(__mips__) + +} // namespace sandbox diff --git a/sandbox/linux/seccomp-bpf/syscall.h b/sandbox/linux/seccomp-bpf/syscall.h new file mode 100644 index 0000000000..ccfc88dcb3 --- /dev/null +++ b/sandbox/linux/seccomp-bpf/syscall.h @@ -0,0 +1,166 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_H__ +#define SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_H__ + +#include <signal.h> +#include <stdint.h> + +#include "base/macros.h" +#include "sandbox/linux/system_headers/linux_signal.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +// This purely static class can be used to perform system calls with some +// low-level control. +class SANDBOX_EXPORT Syscall { + public: + // InvalidCall() invokes Call() with a platform-appropriate syscall + // number that is guaranteed to not be implemented (i.e., normally + // returns -ENOSYS). + // This is primarily meant to be useful for writing sandbox policy + // unit tests. + static intptr_t InvalidCall(); + + // System calls can take up to six parameters (up to eight on some + // architectures). Traditionally, glibc + // implements this property by using variadic argument lists. This works, but + // confuses modern tools such as valgrind, because we are nominally passing + // uninitialized data whenever we call through this function and pass less + // than the full six arguments. + // So, instead, we use C++'s template system to achieve a very similar + // effect. C++ automatically sets the unused parameters to zero for us, and + // it also does the correct type expansion (e.g. from 32bit to 64bit) where + // necessary. + // We have to use C-style cast operators as we want to be able to accept both + // integer and pointer types. + template <class T0, + class T1, + class T2, + class T3, + class T4, + class T5, + class T6, + class T7> + static inline intptr_t + Call(int nr, T0 p0, T1 p1, T2 p2, T3 p3, T4 p4, T5 p5, T6 p6, T7 p7) { + return Call(nr, + (intptr_t)p0, + (intptr_t)p1, + (intptr_t)p2, + (intptr_t)p3, + (intptr_t)p4, + (intptr_t)p5, + (intptr_t)p6, + (intptr_t)p7); + } + + template <class T0, + class T1, + class T2, + class T3, + class T4, + class T5, + class T6> + static inline intptr_t + Call(int nr, T0 p0, T1 p1, T2 p2, T3 p3, T4 p4, T5 p5, T6 p6) { + return Call(nr, + (intptr_t)p0, + (intptr_t)p1, + (intptr_t)p2, + (intptr_t)p3, + (intptr_t)p4, + (intptr_t)p5, + (intptr_t)p6, + 0); + } + + template <class T0, class T1, class T2, class T3, class T4, class T5> + static inline intptr_t + Call(int nr, T0 p0, T1 p1, T2 p2, T3 p3, T4 p4, T5 p5) { + return Call(nr, + (intptr_t)p0, + (intptr_t)p1, + (intptr_t)p2, + (intptr_t)p3, + (intptr_t)p4, + (intptr_t)p5, + 0, + 0); + } + + template <class T0, class T1, class T2, class T3, class T4> + static inline intptr_t Call(int nr, T0 p0, T1 p1, T2 p2, T3 p3, T4 p4) { + return Call(nr, p0, p1, p2, p3, p4, 0, 0, 0); + } + + template <class T0, class T1, class T2, class T3> + static inline intptr_t Call(int nr, T0 p0, T1 p1, T2 p2, T3 p3) { + return Call(nr, p0, p1, p2, p3, 0, 0, 0, 0); + } + + template <class T0, class T1, class T2> + static inline intptr_t Call(int nr, T0 p0, T1 p1, T2 p2) { + return Call(nr, p0, p1, p2, 0, 0, 0, 0, 0); + } + + template <class T0, class T1> + static inline intptr_t Call(int nr, T0 p0, T1 p1) { + return Call(nr, p0, p1, 0, 0, 0, 0, 0, 0); + } + + template <class T0> + static inline intptr_t Call(int nr, T0 p0) { + return Call(nr, p0, 0, 0, 0, 0, 0, 0, 0); + } + + static inline intptr_t Call(int nr) { + return Call(nr, 0, 0, 0, 0, 0, 0, 0, 0); + } + + // Set the registers in |ctx| to match what they would be after a system call + // returning |ret_val|. |ret_val| must follow the Syscall::Call() convention + // of being -errno on errors. + static void PutValueInUcontext(intptr_t ret_val, ucontext_t* ctx); + + private: + // This performs system call |nr| with the arguments p0 to p7 from a constant + // userland address, which is for instance observable by seccomp-bpf filters. + // The constant userland address from which these system calls are made will + // be returned if |nr| is passed as -1. + // On error, this function will return a value between -1 and -4095 which + // should be interpreted as -errno. + static intptr_t Call(int nr, + intptr_t p0, + intptr_t p1, + intptr_t p2, + intptr_t p3, + intptr_t p4, + intptr_t p5, + intptr_t p6, + intptr_t p7); + +#if defined(__mips__) + // This function basically does on MIPS what SandboxSyscall() is doing on + // other architectures. However, because of specificity of MIPS regarding + // handling syscall errors, SandboxSyscall() is made as a wrapper for this + // function in order for SandboxSyscall() to behave more like on other + // architectures on places where return value from SandboxSyscall() is used + // directly (like in most tests). + // The syscall "nr" is called with arguments that are set in an array on which + // pointer "args" points to and an information weather there is an error or no + // is returned to SandboxSyscall() by err_stat. + static intptr_t SandboxSyscallRaw(int nr, + const intptr_t* args, + intptr_t* err_stat); +#endif // defined(__mips__) + + DISALLOW_IMPLICIT_CONSTRUCTORS(Syscall); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SECCOMP_BPF_SYSCALL_H__ diff --git a/sandbox/linux/seccomp-bpf/syscall_unittest.cc b/sandbox/linux/seccomp-bpf/syscall_unittest.cc new file mode 100644 index 0000000000..5fdee6c495 --- /dev/null +++ b/sandbox/linux/seccomp-bpf/syscall_unittest.cc @@ -0,0 +1,240 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf/syscall.h" + +#include <asm/unistd.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include <vector> + +#include "base/posix/eintr_wrapper.h" +#include "build/build_config.h" +#include "sandbox/linux/bpf_dsl/bpf_dsl.h" +#include "sandbox/linux/bpf_dsl/policy.h" +#include "sandbox/linux/seccomp-bpf/bpf_tests.h" +#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" +#include "sandbox/linux/tests/unit_tests.h" +#include "testing/gtest/include/gtest/gtest.h" + +using sandbox::bpf_dsl::Allow; +using sandbox::bpf_dsl::ResultExpr; +using sandbox::bpf_dsl::Trap; + +namespace sandbox { + +namespace { + +// Different platforms use different symbols for the six-argument version +// of the mmap() system call. Test for the correct symbol at compile time. +#ifdef __NR_mmap2 +const int kMMapNr = __NR_mmap2; +#else +const int kMMapNr = __NR_mmap; +#endif + +TEST(Syscall, InvalidCallReturnsENOSYS) { + EXPECT_EQ(-ENOSYS, Syscall::InvalidCall()); +} + +TEST(Syscall, WellKnownEntryPoint) { +// Test that Syscall::Call(-1) is handled specially. Don't do this on ARM, +// where syscall(-1) crashes with SIGILL. Not running the test is fine, as we +// are still testing ARM code in the next set of tests. +#if !defined(__arm__) && !defined(__aarch64__) + EXPECT_NE(Syscall::Call(-1), syscall(-1)); +#endif + +// If possible, test that Syscall::Call(-1) returns the address right +// after +// a kernel entry point. +#if defined(__i386__) + EXPECT_EQ(0x80CDu, ((uint16_t*)Syscall::Call(-1))[-1]); // INT 0x80 +#elif defined(__x86_64__) + EXPECT_EQ(0x050Fu, ((uint16_t*)Syscall::Call(-1))[-1]); // SYSCALL +#elif defined(__arm__) +#if defined(__thumb__) + EXPECT_EQ(0xDF00u, ((uint16_t*)Syscall::Call(-1))[-1]); // SWI 0 +#else + EXPECT_EQ(0xEF000000u, ((uint32_t*)Syscall::Call(-1))[-1]); // SVC 0 +#endif +#elif defined(__mips__) + // Opcode for MIPS sycall is in the lower 16-bits + EXPECT_EQ(0x0cu, (((uint32_t*)Syscall::Call(-1))[-1]) & 0x0000FFFF); +#elif defined(__aarch64__) + EXPECT_EQ(0xD4000001u, ((uint32_t*)Syscall::Call(-1))[-1]); // SVC 0 +#else +#warning Incomplete test case; need port for target platform +#endif +} + +TEST(Syscall, TrivialSyscallNoArgs) { + // Test that we can do basic system calls + EXPECT_EQ(Syscall::Call(__NR_getpid), syscall(__NR_getpid)); +} + +TEST(Syscall, TrivialSyscallOneArg) { + int new_fd; + // Duplicate standard error and close it. + ASSERT_GE(new_fd = Syscall::Call(__NR_dup, 2), 0); + int close_return_value = IGNORE_EINTR(Syscall::Call(__NR_close, new_fd)); + ASSERT_EQ(close_return_value, 0); +} + +TEST(Syscall, TrivialFailingSyscall) { + errno = -42; + int ret = Syscall::Call(__NR_dup, -1); + ASSERT_EQ(-EBADF, ret); + // Verify that Syscall::Call does not touch errno. + ASSERT_EQ(-42, errno); +} + +// SIGSYS trap handler that will be called on __NR_uname. +intptr_t CopySyscallArgsToAux(const struct arch_seccomp_data& args, void* aux) { + // |aux| is our BPF_AUX pointer. + std::vector<uint64_t>* const seen_syscall_args = + static_cast<std::vector<uint64_t>*>(aux); + BPF_ASSERT(arraysize(args.args) == 6); + seen_syscall_args->assign(args.args, args.args + arraysize(args.args)); + return -ENOMEM; +} + +class CopyAllArgsOnUnamePolicy : public bpf_dsl::Policy { + public: + explicit CopyAllArgsOnUnamePolicy(std::vector<uint64_t>* aux) : aux_(aux) {} + ~CopyAllArgsOnUnamePolicy() override {} + + ResultExpr EvaluateSyscall(int sysno) const override { + DCHECK(SandboxBPF::IsValidSyscallNumber(sysno)); + if (sysno == __NR_uname) { + return Trap(CopySyscallArgsToAux, aux_); + } else { + return Allow(); + } + } + + private: + std::vector<uint64_t>* aux_; + + DISALLOW_COPY_AND_ASSIGN(CopyAllArgsOnUnamePolicy); +}; + +// We are testing Syscall::Call() by making use of a BPF filter that +// allows us +// to inspect the system call arguments that the kernel saw. +BPF_TEST(Syscall, + SyntheticSixArgs, + CopyAllArgsOnUnamePolicy, + std::vector<uint64_t> /* (*BPF_AUX) */) { + const int kExpectedValue = 42; + // In this test we only pass integers to the kernel. We might want to make + // additional tests to try other types. What we will see depends on + // implementation details of kernel BPF filters and we will need to document + // the expected behavior very clearly. + int syscall_args[6]; + for (size_t i = 0; i < arraysize(syscall_args); ++i) { + syscall_args[i] = kExpectedValue + i; + } + + // We could use pretty much any system call we don't need here. uname() is + // nice because it doesn't have any dangerous side effects. + BPF_ASSERT(Syscall::Call(__NR_uname, + syscall_args[0], + syscall_args[1], + syscall_args[2], + syscall_args[3], + syscall_args[4], + syscall_args[5]) == -ENOMEM); + + // We expect the trap handler to have copied the 6 arguments. + BPF_ASSERT(BPF_AUX->size() == 6); + + // Don't loop here so that we can see which argument does cause the failure + // easily from the failing line. + // uint64_t is the type passed to our SIGSYS handler. + BPF_ASSERT((*BPF_AUX)[0] == static_cast<uint64_t>(syscall_args[0])); + BPF_ASSERT((*BPF_AUX)[1] == static_cast<uint64_t>(syscall_args[1])); + BPF_ASSERT((*BPF_AUX)[2] == static_cast<uint64_t>(syscall_args[2])); + BPF_ASSERT((*BPF_AUX)[3] == static_cast<uint64_t>(syscall_args[3])); + BPF_ASSERT((*BPF_AUX)[4] == static_cast<uint64_t>(syscall_args[4])); + BPF_ASSERT((*BPF_AUX)[5] == static_cast<uint64_t>(syscall_args[5])); +} + +TEST(Syscall, ComplexSyscallSixArgs) { + int fd; + ASSERT_LE(0, + fd = Syscall::Call(__NR_openat, AT_FDCWD, "/dev/null", O_RDWR, 0L)); + + // Use mmap() to allocate some read-only memory + char* addr0; + ASSERT_NE( + (char*)NULL, + addr0 = reinterpret_cast<char*>(Syscall::Call(kMMapNr, + (void*)NULL, + 4096, + PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, + fd, + 0L))); + + // Try to replace the existing mapping with a read-write mapping + char* addr1; + ASSERT_EQ(addr0, + addr1 = reinterpret_cast<char*>( + Syscall::Call(kMMapNr, + addr0, + 4096L, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + fd, + 0L))); + ++*addr1; // This should not seg fault + + // Clean up + EXPECT_EQ(0, Syscall::Call(__NR_munmap, addr1, 4096L)); + EXPECT_EQ(0, IGNORE_EINTR(Syscall::Call(__NR_close, fd))); + + // Check that the offset argument (i.e. the sixth argument) is processed + // correctly. + ASSERT_GE( + fd = Syscall::Call(__NR_openat, AT_FDCWD, "/proc/self/exe", O_RDONLY, 0L), + 0); + char* addr2, *addr3; + ASSERT_NE((char*)NULL, + addr2 = reinterpret_cast<char*>(Syscall::Call( + kMMapNr, (void*)NULL, 8192L, PROT_READ, MAP_PRIVATE, fd, 0L))); + ASSERT_NE((char*)NULL, + addr3 = reinterpret_cast<char*>(Syscall::Call(kMMapNr, + (void*)NULL, + 4096L, + PROT_READ, + MAP_PRIVATE, + fd, +#if defined(__NR_mmap2) + 1L +#else + 4096L +#endif + ))); + EXPECT_EQ(0, memcmp(addr2 + 4096, addr3, 4096)); + + // Just to be absolutely on the safe side, also verify that the file + // contents matches what we are getting from a read() operation. + char buf[8192]; + EXPECT_EQ(8192, Syscall::Call(__NR_read, fd, buf, 8192L)); + EXPECT_EQ(0, memcmp(addr2, buf, 8192)); + + // Clean up + EXPECT_EQ(0, Syscall::Call(__NR_munmap, addr2, 8192L)); + EXPECT_EQ(0, Syscall::Call(__NR_munmap, addr3, 4096L)); + EXPECT_EQ(0, IGNORE_EINTR(Syscall::Call(__NR_close, fd))); +} + +} // namespace + +} // namespace sandbox diff --git a/sandbox/linux/seccomp-bpf/trap.cc b/sandbox/linux/seccomp-bpf/trap.cc new file mode 100644 index 0000000000..8f559e53b1 --- /dev/null +++ b/sandbox/linux/seccomp-bpf/trap.cc @@ -0,0 +1,390 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf/trap.h" + +#include <errno.h> +#include <signal.h> +#include <string.h> +#include <sys/syscall.h> + +#include <algorithm> +#include <limits> + +#include "base/compiler_specific.h" +#include "base/logging.h" +#include "build/build_config.h" +#include "sandbox/linux/bpf_dsl/seccomp_macros.h" +#include "sandbox/linux/seccomp-bpf/die.h" +#include "sandbox/linux/seccomp-bpf/syscall.h" +#include "sandbox/linux/services/syscall_wrappers.h" +#include "sandbox/linux/system_headers/linux_seccomp.h" +#include "sandbox/linux/system_headers/linux_signal.h" + +namespace { + +struct arch_sigsys { + void* ip; + int nr; + unsigned int arch; +}; + +const int kCapacityIncrement = 20; + +// Unsafe traps can only be turned on, if the user explicitly allowed them +// by setting the CHROME_SANDBOX_DEBUGGING environment variable. +const char kSandboxDebuggingEnv[] = "CHROME_SANDBOX_DEBUGGING"; + +// We need to tell whether we are performing a "normal" callback, or +// whether we were called recursively from within a UnsafeTrap() callback. +// This is a little tricky to do, because we need to somehow get access to +// per-thread data from within a signal context. Normal TLS storage is not +// safely accessible at this time. We could roll our own, but that involves +// a lot of complexity. Instead, we co-opt one bit in the signal mask. +// If BUS is blocked, we assume that we have been called recursively. +// There is a possibility for collision with other code that needs to do +// this, but in practice the risks are low. +// If SIGBUS turns out to be a problem, we could instead co-opt one of the +// realtime signals. There are plenty of them. Unfortunately, there is no +// way to mark a signal as allocated. So, the potential for collision is +// possibly even worse. +bool GetIsInSigHandler(const ucontext_t* ctx) { + // Note: on Android, sigismember does not take a pointer to const. + return sigismember(const_cast<sigset_t*>(&ctx->uc_sigmask), LINUX_SIGBUS); +} + +void SetIsInSigHandler() { + sigset_t mask; + if (sigemptyset(&mask) || sigaddset(&mask, LINUX_SIGBUS) || + sandbox::sys_sigprocmask(LINUX_SIG_BLOCK, &mask, NULL)) { + SANDBOX_DIE("Failed to block SIGBUS"); + } +} + +bool IsDefaultSignalAction(const struct sigaction& sa) { + if (sa.sa_flags & SA_SIGINFO || sa.sa_handler != SIG_DFL) { + return false; + } + return true; +} + +} // namespace + +namespace sandbox { + +Trap::Trap() + : trap_array_(NULL), + trap_array_size_(0), + trap_array_capacity_(0), + has_unsafe_traps_(false) { + // Set new SIGSYS handler + struct sigaction sa = {}; + // In some toolchain, sa_sigaction is not declared in struct sigaction. + // So, here cast the pointer to the sa_handler's type. This works because + // |sa_handler| and |sa_sigaction| shares the same memory. + sa.sa_handler = reinterpret_cast<void (*)(int)>(SigSysAction); + sa.sa_flags = LINUX_SA_SIGINFO | LINUX_SA_NODEFER; + struct sigaction old_sa = {}; + if (sys_sigaction(LINUX_SIGSYS, &sa, &old_sa) < 0) { + SANDBOX_DIE("Failed to configure SIGSYS handler"); + } + + if (!IsDefaultSignalAction(old_sa)) { + static const char kExistingSIGSYSMsg[] = + "Existing signal handler when trying to install SIGSYS. SIGSYS needs " + "to be reserved for seccomp-bpf."; + DLOG(FATAL) << kExistingSIGSYSMsg; + LOG(ERROR) << kExistingSIGSYSMsg; + } + + // Unmask SIGSYS + sigset_t mask; + if (sigemptyset(&mask) || sigaddset(&mask, LINUX_SIGSYS) || + sys_sigprocmask(LINUX_SIG_UNBLOCK, &mask, NULL)) { + SANDBOX_DIE("Failed to configure SIGSYS handler"); + } +} + +bpf_dsl::TrapRegistry* Trap::Registry() { + // Note: This class is not thread safe. It is the caller's responsibility + // to avoid race conditions. Normally, this is a non-issue as the sandbox + // can only be initialized if there are no other threads present. + // Also, this is not a normal singleton. Once created, the global trap + // object must never be destroyed again. + if (!global_trap_) { + global_trap_ = new Trap(); + if (!global_trap_) { + SANDBOX_DIE("Failed to allocate global trap handler"); + } + } + return global_trap_; +} + +void Trap::SigSysAction(int nr, LinuxSigInfo* info, void* void_context) { + if (info) { + MSAN_UNPOISON(info, sizeof(*info)); + } + + // Obtain the signal context. This, most notably, gives us access to + // all CPU registers at the time of the signal. + ucontext_t* ctx = reinterpret_cast<ucontext_t*>(void_context); + if (ctx) { + MSAN_UNPOISON(ctx, sizeof(*ctx)); + } + + if (!global_trap_) { + RAW_SANDBOX_DIE( + "This can't happen. Found no global singleton instance " + "for Trap() handling."); + } + global_trap_->SigSys(nr, info, ctx); +} + +void Trap::SigSys(int nr, LinuxSigInfo* info, ucontext_t* ctx) { + // Signal handlers should always preserve "errno". Otherwise, we could + // trigger really subtle bugs. + const int old_errno = errno; + + // Various sanity checks to make sure we actually received a signal + // triggered by a BPF filter. If something else triggered SIGSYS + // (e.g. kill()), there is really nothing we can do with this signal. + if (nr != LINUX_SIGSYS || info->si_code != SYS_SECCOMP || !ctx || + info->si_errno <= 0 || + static_cast<size_t>(info->si_errno) > trap_array_size_) { + // ATI drivers seem to send SIGSYS, so this cannot be FATAL. + // See crbug.com/178166. + // TODO(jln): add a DCHECK or move back to FATAL. + RAW_LOG(ERROR, "Unexpected SIGSYS received."); + errno = old_errno; + return; + } + + + // Obtain the siginfo information that is specific to SIGSYS. Unfortunately, + // most versions of glibc don't include this information in siginfo_t. So, + // we need to explicitly copy it into a arch_sigsys structure. + struct arch_sigsys sigsys; + memcpy(&sigsys, &info->_sifields, sizeof(sigsys)); + +#if defined(__mips__) + // When indirect syscall (syscall(__NR_foo, ...)) is made on Mips, the + // number in register SECCOMP_SYSCALL(ctx) is always __NR_syscall and the + // real number of a syscall (__NR_foo) is in SECCOMP_PARM1(ctx) + bool sigsys_nr_is_bad = sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)) && + sigsys.nr != static_cast<int>(SECCOMP_PARM1(ctx)); +#else + bool sigsys_nr_is_bad = sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)); +#endif + + // Some more sanity checks. + if (sigsys.ip != reinterpret_cast<void*>(SECCOMP_IP(ctx)) || + sigsys_nr_is_bad || sigsys.arch != SECCOMP_ARCH) { + // TODO(markus): + // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal + // safe and can lead to bugs. We should eventually implement a different + // logging and reporting mechanism that is safe to be called from + // the sigSys() handler. + RAW_SANDBOX_DIE("Sanity checks are failing after receiving SIGSYS."); + } + + intptr_t rc; + if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) { + errno = old_errno; + if (sigsys.nr == __NR_clone) { + RAW_SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler."); + } +#if defined(__mips__) + // Mips supports up to eight arguments for syscall. + // However, seccomp bpf can filter only up to six arguments, so using eight + // arguments has sense only when using UnsafeTrap() handler. + rc = Syscall::Call(SECCOMP_SYSCALL(ctx), + SECCOMP_PARM1(ctx), + SECCOMP_PARM2(ctx), + SECCOMP_PARM3(ctx), + SECCOMP_PARM4(ctx), + SECCOMP_PARM5(ctx), + SECCOMP_PARM6(ctx), + SECCOMP_PARM7(ctx), + SECCOMP_PARM8(ctx)); +#else + rc = Syscall::Call(SECCOMP_SYSCALL(ctx), + SECCOMP_PARM1(ctx), + SECCOMP_PARM2(ctx), + SECCOMP_PARM3(ctx), + SECCOMP_PARM4(ctx), + SECCOMP_PARM5(ctx), + SECCOMP_PARM6(ctx)); +#endif // defined(__mips__) + } else { + const TrapKey& trap = trap_array_[info->si_errno - 1]; + if (!trap.safe) { + SetIsInSigHandler(); + } + + // Copy the seccomp-specific data into a arch_seccomp_data structure. This + // is what we are showing to TrapFnc callbacks that the system call + // evaluator registered with the sandbox. + struct arch_seccomp_data data = { + static_cast<int>(SECCOMP_SYSCALL(ctx)), + SECCOMP_ARCH, + reinterpret_cast<uint64_t>(sigsys.ip), + {static_cast<uint64_t>(SECCOMP_PARM1(ctx)), + static_cast<uint64_t>(SECCOMP_PARM2(ctx)), + static_cast<uint64_t>(SECCOMP_PARM3(ctx)), + static_cast<uint64_t>(SECCOMP_PARM4(ctx)), + static_cast<uint64_t>(SECCOMP_PARM5(ctx)), + static_cast<uint64_t>(SECCOMP_PARM6(ctx))}}; + + // Now call the TrapFnc callback associated with this particular instance + // of SECCOMP_RET_TRAP. + rc = trap.fnc(data, const_cast<void*>(trap.aux)); + } + + // Update the CPU register that stores the return code of the system call + // that we just handled, and restore "errno" to the value that it had + // before entering the signal handler. + Syscall::PutValueInUcontext(rc, ctx); + errno = old_errno; + + return; +} + +bool Trap::TrapKey::operator<(const TrapKey& o) const { + if (fnc != o.fnc) { + return fnc < o.fnc; + } else if (aux != o.aux) { + return aux < o.aux; + } else { + return safe < o.safe; + } +} + +uint16_t Trap::Add(TrapFnc fnc, const void* aux, bool safe) { + if (!safe && !SandboxDebuggingAllowedByUser()) { + // Unless the user set the CHROME_SANDBOX_DEBUGGING environment variable, + // we never return an ErrorCode that is marked as "unsafe". This also + // means, the BPF compiler will never emit code that allow unsafe system + // calls to by-pass the filter (because they use the magic return address + // from Syscall::Call(-1)). + + // This SANDBOX_DIE() can optionally be removed. It won't break security, + // but it might make error messages from the BPF compiler a little harder + // to understand. Removing the SANDBOX_DIE() allows callers to easily check + // whether unsafe traps are supported (by checking whether the returned + // ErrorCode is ET_INVALID). + SANDBOX_DIE( + "Cannot use unsafe traps unless CHROME_SANDBOX_DEBUGGING " + "is enabled"); + + return 0; + } + + // Each unique pair of TrapFnc and auxiliary data make up a distinct instance + // of a SECCOMP_RET_TRAP. + TrapKey key(fnc, aux, safe); + + // We return unique identifiers together with SECCOMP_RET_TRAP. This allows + // us to associate trap with the appropriate handler. The kernel allows us + // identifiers in the range from 0 to SECCOMP_RET_DATA (0xFFFF). We want to + // avoid 0, as it could be confused for a trap without any specific id. + // The nice thing about sequentially numbered identifiers is that we can also + // trivially look them up from our signal handler without making any system + // calls that might be async-signal-unsafe. + // In order to do so, we store all of our traps in a C-style trap_array_. + + TrapIds::const_iterator iter = trap_ids_.find(key); + if (iter != trap_ids_.end()) { + // We have seen this pair before. Return the same id that we assigned + // earlier. + return iter->second; + } + + // This is a new pair. Remember it and assign a new id. + if (trap_array_size_ >= SECCOMP_RET_DATA /* 0xFFFF */ || + trap_array_size_ >= std::numeric_limits<uint16_t>::max()) { + // In practice, this is pretty much impossible to trigger, as there + // are other kernel limitations that restrict overall BPF program sizes. + SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances"); + } + + // Our callers ensure that there are no other threads accessing trap_array_ + // concurrently (typically this is done by ensuring that we are single- + // threaded while the sandbox is being set up). But we nonetheless are + // modifying a live data structure that could be accessed any time a + // system call is made; as system calls could be triggering SIGSYS. + // So, we have to be extra careful that we update trap_array_ atomically. + // In particular, this means we shouldn't be using realloc() to resize it. + // Instead, we allocate a new array, copy the values, and then switch the + // pointer. We only really care about the pointer being updated atomically + // and the data that is pointed to being valid, as these are the only + // values accessed from the signal handler. It is OK if trap_array_size_ + // is inconsistent with the pointer, as it is monotonously increasing. + // Also, we only care about compiler barriers, as the signal handler is + // triggered synchronously from a system call. We don't have to protect + // against issues with the memory model or with completely asynchronous + // events. + if (trap_array_size_ >= trap_array_capacity_) { + trap_array_capacity_ += kCapacityIncrement; + TrapKey* old_trap_array = trap_array_; + TrapKey* new_trap_array = new TrapKey[trap_array_capacity_]; + std::copy_n(old_trap_array, trap_array_size_, new_trap_array); + + // Language specs are unclear on whether the compiler is allowed to move + // the "delete[]" above our preceding assignments and/or memory moves, + // iff the compiler believes that "delete[]" doesn't have any other + // global side-effects. + // We insert optimization barriers to prevent this from happening. + // The first barrier is probably not needed, but better be explicit in + // what we want to tell the compiler. + // The clang developer mailing list couldn't answer whether this is a + // legitimate worry; but they at least thought that the barrier is + // sufficient to prevent the (so far hypothetical) problem of re-ordering + // of instructions by the compiler. + // + // TODO(mdempsky): Try to clean this up using base/atomicops or C++11 + // atomics; see crbug.com/414363. + asm volatile("" : "=r"(new_trap_array) : "0"(new_trap_array) : "memory"); + trap_array_ = new_trap_array; + asm volatile("" : "=r"(trap_array_) : "0"(trap_array_) : "memory"); + + delete[] old_trap_array; + } + + uint16_t id = trap_array_size_ + 1; + trap_ids_[key] = id; + trap_array_[trap_array_size_] = key; + trap_array_size_++; + return id; +} + +bool Trap::SandboxDebuggingAllowedByUser() { + const char* debug_flag = getenv(kSandboxDebuggingEnv); + return debug_flag && *debug_flag; +} + +bool Trap::EnableUnsafeTraps() { + if (!has_unsafe_traps_) { + // Unsafe traps are a one-way fuse. Once enabled, they can never be turned + // off again. + // We only allow enabling unsafe traps, if the user explicitly set an + // appropriate environment variable. This prevents bugs that accidentally + // disable all sandboxing for all users. + if (SandboxDebuggingAllowedByUser()) { + // We only ever print this message once, when we enable unsafe traps the + // first time. + SANDBOX_INFO("WARNING! Disabling sandbox for debugging purposes"); + has_unsafe_traps_ = true; + } else { + SANDBOX_INFO( + "Cannot disable sandbox and use unsafe traps unless " + "CHROME_SANDBOX_DEBUGGING is turned on first"); + } + } + // Returns the, possibly updated, value of has_unsafe_traps_. + return has_unsafe_traps_; +} + +Trap* Trap::global_trap_; + +} // namespace sandbox diff --git a/sandbox/linux/seccomp-bpf/trap.h b/sandbox/linux/seccomp-bpf/trap.h new file mode 100644 index 0000000000..50ac3fd1c3 --- /dev/null +++ b/sandbox/linux/seccomp-bpf/trap.h @@ -0,0 +1,85 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SECCOMP_BPF_TRAP_H__ +#define SANDBOX_LINUX_SECCOMP_BPF_TRAP_H__ + +#include <stdint.h> + +#include <map> + +#include "base/macros.h" +#include "sandbox/linux/bpf_dsl/trap_registry.h" +#include "sandbox/linux/system_headers/linux_signal.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +// The Trap class allows a BPF filter program to branch out to user space by +// raising a SIGSYS signal. +// N.B.: This class does not perform any synchronization operations. If +// modifications are made to any of the traps, it is the caller's +// responsibility to ensure that this happens in a thread-safe fashion. +// Preferably, that means that no other threads should be running at that +// time. For the purposes of our sandbox, this assertion should always be +// true. Threads are incompatible with the seccomp sandbox anyway. +class SANDBOX_EXPORT Trap : public bpf_dsl::TrapRegistry { + public: + uint16_t Add(TrapFnc fnc, const void* aux, bool safe) override; + + bool EnableUnsafeTraps() override; + + // Registry returns the trap registry used by Trap's SIGSYS handler, + // creating it if necessary. + static bpf_dsl::TrapRegistry* Registry(); + + // SandboxDebuggingAllowedByUser returns whether the + // "CHROME_SANDBOX_DEBUGGING" environment variable is set. + static bool SandboxDebuggingAllowedByUser(); + + private: + struct TrapKey { + TrapKey() : fnc(NULL), aux(NULL), safe(false) {} + TrapKey(TrapFnc f, const void* a, bool s) : fnc(f), aux(a), safe(s) {} + TrapFnc fnc; + const void* aux; + bool safe; + bool operator<(const TrapKey&) const; + }; + typedef std::map<TrapKey, uint16_t> TrapIds; + + // Our constructor is private. A shared global instance is created + // automatically as needed. + Trap(); + + // The destructor is unimplemented as destroying this object would + // break subsequent system calls that trigger a SIGSYS. + ~Trap() = delete; + + static void SigSysAction(int nr, LinuxSigInfo* info, void* void_context); + + // Make sure that SigSys is not inlined in order to get slightly better crash + // dumps. + void SigSys(int nr, LinuxSigInfo* info, ucontext_t* ctx) + __attribute__((noinline)); + // We have a global singleton that handles all of our SIGSYS traps. This + // variable must never be deallocated after it has been set up initially, as + // there is no way to reset in-kernel BPF filters that generate SIGSYS + // events. + static Trap* global_trap_; + + TrapIds trap_ids_; // Maps from TrapKeys to numeric ids + TrapKey* trap_array_; // Array of TrapKeys indexed by ids + size_t trap_array_size_; // Currently used size of array + size_t trap_array_capacity_; // Currently allocated capacity of array + bool has_unsafe_traps_; // Whether unsafe traps have been enabled + + // Copying and assigning is unimplemented. It doesn't make sense for a + // singleton. + DISALLOW_COPY_AND_ASSIGN(Trap); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SECCOMP_BPF_TRAP_H__ diff --git a/sandbox/linux/seccomp-bpf/trap_unittest.cc b/sandbox/linux/seccomp-bpf/trap_unittest.cc new file mode 100644 index 0000000000..99f94bfb3a --- /dev/null +++ b/sandbox/linux/seccomp-bpf/trap_unittest.cc @@ -0,0 +1,28 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/seccomp-bpf/trap.h" + +#include <signal.h> + +#include "sandbox/linux/tests/unit_tests.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace sandbox { +namespace { + +SANDBOX_TEST_ALLOW_NOISE(Trap, SigSysAction) { + // This creates a global Trap instance, and registers the signal handler + // (Trap::SigSysAction). + Trap::Registry(); + + // Send SIGSYS to self. If signal handler (SigSysAction) is not registered, + // the process will be terminated with status code -SIGSYS. + // Note that, SigSysAction handler would output an error message + // "Unexpected SIGSYS received." so it is necessary to allow the noise. + raise(SIGSYS); +} + +} // namespace +} // namespace sandbox diff --git a/sandbox/linux/services/credentials.cc b/sandbox/linux/services/credentials.cc new file mode 100644 index 0000000000..c77fd9efde --- /dev/null +++ b/sandbox/linux/services/credentials.cc @@ -0,0 +1,299 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/credentials.h" + +#include <errno.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "base/bind.h" +#include "base/files/file_path.h" +#include "base/files/file_util.h" +#include "base/logging.h" +#include "base/posix/eintr_wrapper.h" +#include "base/process/launch.h" +#include "base/template_util.h" +#include "build/build_config.h" +#include "sandbox/linux/services/namespace_utils.h" +#include "sandbox/linux/services/proc_util.h" +#include "sandbox/linux/services/syscall_wrappers.h" +#include "sandbox/linux/services/thread_helpers.h" +#include "sandbox/linux/system_headers/capability.h" +#include "sandbox/linux/system_headers/linux_signal.h" +#include "third_party/valgrind/valgrind.h" + +namespace sandbox { + +namespace { + +bool IsRunningOnValgrind() { return RUNNING_ON_VALGRIND; } + +// Checks that the set of RES-uids and the set of RES-gids have +// one element each and return that element in |resuid| and |resgid| +// respectively. It's ok to pass NULL as one or both of the ids. +bool GetRESIds(uid_t* resuid, gid_t* resgid) { + uid_t ruid, euid, suid; + gid_t rgid, egid, sgid; + PCHECK(sys_getresuid(&ruid, &euid, &suid) == 0); + PCHECK(sys_getresgid(&rgid, &egid, &sgid) == 0); + const bool uids_are_equal = (ruid == euid) && (ruid == suid); + const bool gids_are_equal = (rgid == egid) && (rgid == sgid); + if (!uids_are_equal || !gids_are_equal) return false; + if (resuid) *resuid = euid; + if (resgid) *resgid = egid; + return true; +} + +const int kExitSuccess = 0; + +int ChrootToSelfFdinfo(void*) { + RAW_CHECK(sys_chroot("/proc/self/fdinfo/") == 0); + + // CWD is essentially an implicit file descriptor, so be careful to not + // leave it behind. + RAW_CHECK(chdir("/") == 0); + _exit(kExitSuccess); +} + +// chroot() to an empty dir that is "safe". To be safe, it must not contain +// any subdirectory (chroot-ing there would allow a chroot escape) and it must +// be impossible to create an empty directory there. +// We achieve this by doing the following: +// 1. We create a new process sharing file system information. +// 2. In the child, we chroot to /proc/self/fdinfo/ +// This is already "safe", since fdinfo/ does not contain another directory and +// one cannot create another directory there. +// 3. The process dies +// After (3) happens, the directory is not available anymore in /proc. +bool ChrootToSafeEmptyDir() { + // We need to chroot to a fdinfo that is unique to a process and have that + // process die. + // 1. We don't want to simply fork() because duplicating the page tables is + // slow with a big address space. + // 2. We do not use a regular thread (that would unshare CLONE_FILES) because + // when we are in a PID namespace, we cannot easily get a handle to the + // /proc/tid directory for the thread (since /proc may not be aware of the + // PID namespace). With a process, we can just use /proc/self. + pid_t pid = -1; + char stack_buf[PTHREAD_STACK_MIN]; +#if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \ + defined(ARCH_CPU_MIPS64_FAMILY) || defined(ARCH_CPU_MIPS_FAMILY) + // The stack grows downward. + void* stack = stack_buf + sizeof(stack_buf); +#else +#error "Unsupported architecture" +#endif + + pid = clone(ChrootToSelfFdinfo, stack, + CLONE_VM | CLONE_VFORK | CLONE_FS | LINUX_SIGCHLD, nullptr, + nullptr, nullptr, nullptr); + PCHECK(pid != -1); + + int status = -1; + PCHECK(HANDLE_EINTR(waitpid(pid, &status, 0)) == pid); + + return WIFEXITED(status) && WEXITSTATUS(status) == kExitSuccess; +} + +// CHECK() that an attempt to move to a new user namespace raised an expected +// errno. +void CheckCloneNewUserErrno(int error) { + // EPERM can happen if already in a chroot. EUSERS if too many nested + // namespaces are used. EINVAL for kernels that don't support the feature. + // Valgrind will ENOSYS unshare(). + PCHECK(error == EPERM || error == EUSERS || error == EINVAL || + error == ENOSYS); +} + +// Converts a Capability to the corresponding Linux CAP_XXX value. +int CapabilityToKernelValue(Credentials::Capability cap) { + switch (cap) { + case Credentials::Capability::SYS_CHROOT: + return CAP_SYS_CHROOT; + case Credentials::Capability::SYS_ADMIN: + return CAP_SYS_ADMIN; + } + + LOG(FATAL) << "Invalid Capability: " << static_cast<int>(cap); + return 0; +} + +} // namespace. + +// static +bool Credentials::DropAllCapabilities(int proc_fd) { + if (!SetCapabilities(proc_fd, std::vector<Capability>())) { + return false; + } + + CHECK(!HasAnyCapability()); + return true; +} + +// static +bool Credentials::DropAllCapabilities() { + base::ScopedFD proc_fd(ProcUtil::OpenProc()); + return Credentials::DropAllCapabilities(proc_fd.get()); +} + +// static +bool Credentials::DropAllCapabilitiesOnCurrentThread() { + return SetCapabilitiesOnCurrentThread(std::vector<Capability>()); +} + +// static +bool Credentials::SetCapabilitiesOnCurrentThread( + const std::vector<Capability>& caps) { + struct cap_hdr hdr = {}; + hdr.version = _LINUX_CAPABILITY_VERSION_3; + struct cap_data data[_LINUX_CAPABILITY_U32S_3] = {{}}; + + // Initially, cap has no capability flags set. Enable the effective and + // permitted flags only for the requested capabilities. + for (const Capability cap : caps) { + const int cap_num = CapabilityToKernelValue(cap); + const size_t index = CAP_TO_INDEX(cap_num); + const uint32_t mask = CAP_TO_MASK(cap_num); + data[index].effective |= mask; + data[index].permitted |= mask; + } + + return sys_capset(&hdr, data) == 0; +} + +// static +bool Credentials::SetCapabilities(int proc_fd, + const std::vector<Capability>& caps) { + DCHECK_LE(0, proc_fd); + +#if !defined(THREAD_SANITIZER) + // With TSAN, accept to break the security model as it is a testing + // configuration. + CHECK(ThreadHelpers::IsSingleThreaded(proc_fd)); +#endif + + return SetCapabilitiesOnCurrentThread(caps); +} + +bool Credentials::HasAnyCapability() { + struct cap_hdr hdr = {}; + hdr.version = _LINUX_CAPABILITY_VERSION_3; + struct cap_data data[_LINUX_CAPABILITY_U32S_3] = {{}}; + + PCHECK(sys_capget(&hdr, data) == 0); + + for (size_t i = 0; i < arraysize(data); ++i) { + if (data[i].effective || data[i].permitted || data[i].inheritable) { + return true; + } + } + + return false; +} + +bool Credentials::HasCapability(Capability cap) { + struct cap_hdr hdr = {}; + hdr.version = _LINUX_CAPABILITY_VERSION_3; + struct cap_data data[_LINUX_CAPABILITY_U32S_3] = {{}}; + + PCHECK(sys_capget(&hdr, data) == 0); + + const int cap_num = CapabilityToKernelValue(cap); + const size_t index = CAP_TO_INDEX(cap_num); + const uint32_t mask = CAP_TO_MASK(cap_num); + + return (data[index].effective | data[index].permitted | + data[index].inheritable) & + mask; +} + +// static +bool Credentials::CanCreateProcessInNewUserNS() { + // Valgrind will let clone(2) pass-through, but doesn't support unshare(), + // so always consider UserNS unsupported there. + if (IsRunningOnValgrind()) { + return false; + } + +#if defined(THREAD_SANITIZER) + // With TSAN, processes will always have threads running and can never + // enter a new user namespace with MoveToNewUserNS(). + return false; +#endif + + // This is roughly a fork(). + const pid_t pid = sys_clone(CLONE_NEWUSER | SIGCHLD, 0, 0, 0, 0); + + if (pid == -1) { + CheckCloneNewUserErrno(errno); + return false; + } + + // The parent process could have had threads. In the child, these threads + // have disappeared. Make sure to not do anything in the child, as this is a + // fragile execution environment. + if (pid == 0) { + _exit(kExitSuccess); + } + + // Always reap the child. + int status = -1; + PCHECK(HANDLE_EINTR(waitpid(pid, &status, 0)) == pid); + CHECK(WIFEXITED(status)); + CHECK_EQ(kExitSuccess, WEXITSTATUS(status)); + + // clone(2) succeeded, we can use CLONE_NEWUSER. + return true; +} + +bool Credentials::MoveToNewUserNS() { + uid_t uid; + gid_t gid; + if (!GetRESIds(&uid, &gid)) { + // If all the uids (or gids) are not equal to each other, the security + // model will most likely confuse the caller, abort. + DVLOG(1) << "uids or gids differ!"; + return false; + } + int ret = sys_unshare(CLONE_NEWUSER); + if (ret) { + const int unshare_errno = errno; + VLOG(1) << "Looks like unprivileged CLONE_NEWUSER may not be available " + << "on this kernel."; + CheckCloneNewUserErrno(unshare_errno); + return false; + } + + if (NamespaceUtils::KernelSupportsDenySetgroups()) { + PCHECK(NamespaceUtils::DenySetgroups()); + } + + // The current {r,e,s}{u,g}id is now an overflow id (c.f. + // /proc/sys/kernel/overflowuid). Setup the uid and gid maps. + DCHECK(GetRESIds(NULL, NULL)); + const char kGidMapFile[] = "/proc/self/gid_map"; + const char kUidMapFile[] = "/proc/self/uid_map"; + PCHECK(NamespaceUtils::WriteToIdMapFile(kGidMapFile, gid)); + PCHECK(NamespaceUtils::WriteToIdMapFile(kUidMapFile, uid)); + DCHECK(GetRESIds(NULL, NULL)); + return true; +} + +bool Credentials::DropFileSystemAccess(int proc_fd) { + CHECK_LE(0, proc_fd); + + CHECK(ChrootToSafeEmptyDir()); + CHECK(!base::DirectoryExists(base::FilePath("/proc"))); + CHECK(!ProcUtil::HasOpenDirectory(proc_fd)); + // We never let this function fail. + return true; +} + +} // namespace sandbox. diff --git a/sandbox/linux/services/credentials.h b/sandbox/linux/services/credentials.h new file mode 100644 index 0000000000..0001dc7328 --- /dev/null +++ b/sandbox/linux/services/credentials.h @@ -0,0 +1,104 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SERVICES_CREDENTIALS_H_ +#define SANDBOX_LINUX_SERVICES_CREDENTIALS_H_ + +#include "build/build_config.h" +// Link errors are tedious to track, raise a compile-time error instead. +#if defined(OS_ANDROID) +#error "Android is not supported." +#endif // defined(OS_ANDROID). + +#include <string> +#include <vector> + +#include "base/compiler_specific.h" +#include "base/macros.h" +#include "base/memory/scoped_ptr.h" +#include "sandbox/linux/system_headers/capability.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +// This class should be used to manipulate the current process' credentials. +// It is currently a stub used to manipulate POSIX.1e capabilities as +// implemented by the Linux kernel. +class SANDBOX_EXPORT Credentials { + public: + // For brevity, we only expose enums for the subset of capabilities we use. + // This can be expanded as the need arises. + enum class Capability { + SYS_CHROOT, + SYS_ADMIN, + }; + + // Drop all capabilities in the effective, inheritable and permitted sets for + // the current thread. For security reasons, since capabilities are + // per-thread, the caller is responsible for ensuring it is single-threaded + // when calling this API. + // |proc_fd| must be a file descriptor to /proc/ and remains owned by + // the caller. + static bool DropAllCapabilities(int proc_fd) WARN_UNUSED_RESULT; + // A similar API which assumes that it can open /proc/self/ by itself. + static bool DropAllCapabilities() WARN_UNUSED_RESULT; + // Sets the effective and permitted capability sets for the current thread to + // the list of capabiltiies in |caps|. All other capability flags are cleared. + static bool SetCapabilities(int proc_fd, + const std::vector<Capability>& caps) + WARN_UNUSED_RESULT; + + // Versions of the above functions which do not check that the process is + // single-threaded. After calling these functions, capabilities of other + // threads will not be changed. This is dangerous, do not use unless you nkow + // what you are doing. + static bool DropAllCapabilitiesOnCurrentThread() WARN_UNUSED_RESULT; + static bool SetCapabilitiesOnCurrentThread( + const std::vector<Capability>& caps) WARN_UNUSED_RESULT; + + // Returns true if the current thread has either the effective, permitted, or + // inheritable flag set for the given capability. + static bool HasCapability(Capability cap); + + // Return true iff there is any capability in any of the capabilities sets + // of the current thread. + static bool HasAnyCapability(); + + // Returns whether the kernel supports CLONE_NEWUSER and whether it would be + // possible to immediately move to a new user namespace. There is no point + // in using this method right before calling MoveToNewUserNS(), simply call + // MoveToNewUserNS() immediately. This method is only useful to test the + // ability to move to a user namespace ahead of time. + static bool CanCreateProcessInNewUserNS(); + + // Move the current process to a new "user namespace" as supported by Linux + // 3.8+ (CLONE_NEWUSER). + // The uid map will be set-up so that the perceived uid and gid will not + // change. + // If this call succeeds, the current process will be granted a full set of + // capabilities in the new namespace. + // This will fail if the process is not mono-threaded. + static bool MoveToNewUserNS() WARN_UNUSED_RESULT; + + // Remove the ability of the process to access the file system. File + // descriptors which are already open prior to calling this API remain + // available. + // The implementation currently uses chroot(2) and requires CAP_SYS_CHROOT. + // CAP_SYS_CHROOT can be acquired by using the MoveToNewUserNS() API. + // |proc_fd| must be a file descriptor to /proc/ and must be the only open + // directory file descriptor of the process. + // + // CRITICAL: + // - the caller must close |proc_fd| eventually or access to the file + // system can be recovered. + // - DropAllCapabilities() must be called to prevent escapes. + static bool DropFileSystemAccess(int proc_fd) WARN_UNUSED_RESULT; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Credentials); +}; + +} // namespace sandbox. + +#endif // SANDBOX_LINUX_SERVICES_CREDENTIALS_H_ diff --git a/sandbox/linux/services/credentials_unittest.cc b/sandbox/linux/services/credentials_unittest.cc new file mode 100644 index 0000000000..6b93c86c3e --- /dev/null +++ b/sandbox/linux/services/credentials_unittest.cc @@ -0,0 +1,242 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/credentials.h" + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <sys/capability.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <vector> + +#include "base/files/file_path.h" +#include "base/files/file_util.h" +#include "base/files/scoped_file.h" +#include "base/logging.h" +#include "base/memory/scoped_ptr.h" +#include "sandbox/linux/services/proc_util.h" +#include "sandbox/linux/services/syscall_wrappers.h" +#include "sandbox/linux/system_headers/capability.h" +#include "sandbox/linux/tests/unit_tests.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace sandbox { + +namespace { + +struct CapFreeDeleter { + inline void operator()(cap_t cap) const { + int ret = cap_free(cap); + CHECK_EQ(0, ret); + } +}; + +// Wrapper to manage libcap2's cap_t type. +typedef scoped_ptr<typeof(*((cap_t)0)), CapFreeDeleter> ScopedCap; + +bool WorkingDirectoryIsRoot() { + char current_dir[PATH_MAX]; + char* cwd = getcwd(current_dir, sizeof(current_dir)); + PCHECK(cwd); + if (strcmp("/", cwd)) return false; + + // The current directory is the root. Add a few paranoid checks. + struct stat current; + CHECK_EQ(0, stat(".", ¤t)); + struct stat parrent; + CHECK_EQ(0, stat("..", &parrent)); + CHECK_EQ(current.st_dev, parrent.st_dev); + CHECK_EQ(current.st_ino, parrent.st_ino); + CHECK_EQ(current.st_mode, parrent.st_mode); + CHECK_EQ(current.st_uid, parrent.st_uid); + CHECK_EQ(current.st_gid, parrent.st_gid); + return true; +} + +SANDBOX_TEST(Credentials, DropAllCaps) { + CHECK(Credentials::DropAllCapabilities()); + CHECK(!Credentials::HasAnyCapability()); +} + +SANDBOX_TEST(Credentials, MoveToNewUserNS) { + CHECK(Credentials::DropAllCapabilities()); + bool moved_to_new_ns = Credentials::MoveToNewUserNS(); + fprintf(stdout, + "Unprivileged CLONE_NEWUSER supported: %s\n", + moved_to_new_ns ? "true." : "false."); + fflush(stdout); + if (!moved_to_new_ns) { + fprintf(stdout, "This kernel does not support unprivileged namespaces. " + "USERNS tests will succeed without running.\n"); + fflush(stdout); + return; + } + CHECK(Credentials::HasAnyCapability()); + CHECK(Credentials::DropAllCapabilities()); + CHECK(!Credentials::HasAnyCapability()); +} + +SANDBOX_TEST(Credentials, CanCreateProcessInNewUserNS) { + CHECK(Credentials::DropAllCapabilities()); + bool user_ns_supported = Credentials::CanCreateProcessInNewUserNS(); + bool moved_to_new_ns = Credentials::MoveToNewUserNS(); + CHECK_EQ(user_ns_supported, moved_to_new_ns); +} + +SANDBOX_TEST(Credentials, UidIsPreserved) { + CHECK(Credentials::DropAllCapabilities()); + uid_t old_ruid, old_euid, old_suid; + gid_t old_rgid, old_egid, old_sgid; + PCHECK(0 == getresuid(&old_ruid, &old_euid, &old_suid)); + PCHECK(0 == getresgid(&old_rgid, &old_egid, &old_sgid)); + // Probably missing kernel support. + if (!Credentials::MoveToNewUserNS()) return; + uid_t new_ruid, new_euid, new_suid; + PCHECK(0 == getresuid(&new_ruid, &new_euid, &new_suid)); + CHECK(old_ruid == new_ruid); + CHECK(old_euid == new_euid); + CHECK(old_suid == new_suid); + + gid_t new_rgid, new_egid, new_sgid; + PCHECK(0 == getresgid(&new_rgid, &new_egid, &new_sgid)); + CHECK(old_rgid == new_rgid); + CHECK(old_egid == new_egid); + CHECK(old_sgid == new_sgid); +} + +bool NewUserNSCycle() { + if (!Credentials::MoveToNewUserNS() || + !Credentials::HasAnyCapability() || + !Credentials::DropAllCapabilities() || + Credentials::HasAnyCapability()) { + return false; + } + return true; +} + +SANDBOX_TEST(Credentials, NestedUserNS) { + CHECK(Credentials::DropAllCapabilities()); + // Probably missing kernel support. + if (!Credentials::MoveToNewUserNS()) return; + CHECK(Credentials::DropAllCapabilities()); + // As of 3.12, the kernel has a limit of 32. See create_user_ns(). + const int kNestLevel = 10; + for (int i = 0; i < kNestLevel; ++i) { + CHECK(NewUserNSCycle()) << "Creating new user NS failed at iteration " + << i << "."; + } +} + +// Test the WorkingDirectoryIsRoot() helper. +SANDBOX_TEST(Credentials, CanDetectRoot) { + PCHECK(0 == chdir("/proc/")); + CHECK(!WorkingDirectoryIsRoot()); + PCHECK(0 == chdir("/")); + CHECK(WorkingDirectoryIsRoot()); +} + +// Disabled on ASAN because of crbug.com/451603. +SANDBOX_TEST(Credentials, DISABLE_ON_ASAN(DropFileSystemAccessIsSafe)) { + CHECK(Credentials::DropAllCapabilities()); + // Probably missing kernel support. + if (!Credentials::MoveToNewUserNS()) return; + CHECK(Credentials::DropFileSystemAccess(ProcUtil::OpenProc().get())); + CHECK(!base::DirectoryExists(base::FilePath("/proc"))); + CHECK(WorkingDirectoryIsRoot()); + CHECK(base::IsDirectoryEmpty(base::FilePath("/"))); + // We want the chroot to never have a subdirectory. A subdirectory + // could allow a chroot escape. + CHECK_NE(0, mkdir("/test", 0700)); +} + +// Check that after dropping filesystem access and dropping privileges +// it is not possible to regain capabilities. +SANDBOX_TEST(Credentials, DISABLE_ON_ASAN(CannotRegainPrivileges)) { + base::ScopedFD proc_fd(ProcUtil::OpenProc()); + CHECK(Credentials::DropAllCapabilities(proc_fd.get())); + // Probably missing kernel support. + if (!Credentials::MoveToNewUserNS()) return; + CHECK(Credentials::DropFileSystemAccess(proc_fd.get())); + CHECK(Credentials::DropAllCapabilities(proc_fd.get())); + + // The kernel should now prevent us from regaining capabilities because we + // are in a chroot. + CHECK(!Credentials::CanCreateProcessInNewUserNS()); + CHECK(!Credentials::MoveToNewUserNS()); +} + +SANDBOX_TEST(Credentials, SetCapabilities) { + // Probably missing kernel support. + if (!Credentials::MoveToNewUserNS()) + return; + + base::ScopedFD proc_fd(ProcUtil::OpenProc()); + + CHECK(Credentials::HasCapability(Credentials::Capability::SYS_ADMIN)); + CHECK(Credentials::HasCapability(Credentials::Capability::SYS_CHROOT)); + + std::vector<Credentials::Capability> caps; + caps.push_back(Credentials::Capability::SYS_CHROOT); + CHECK(Credentials::SetCapabilities(proc_fd.get(), caps)); + + CHECK(!Credentials::HasCapability(Credentials::Capability::SYS_ADMIN)); + CHECK(Credentials::HasCapability(Credentials::Capability::SYS_CHROOT)); + + const std::vector<Credentials::Capability> no_caps; + CHECK(Credentials::SetCapabilities(proc_fd.get(), no_caps)); + CHECK(!Credentials::HasAnyCapability()); +} + +SANDBOX_TEST(Credentials, SetCapabilitiesAndChroot) { + // Probably missing kernel support. + if (!Credentials::MoveToNewUserNS()) + return; + + base::ScopedFD proc_fd(ProcUtil::OpenProc()); + + CHECK(Credentials::HasCapability(Credentials::Capability::SYS_CHROOT)); + PCHECK(chroot("/") == 0); + + std::vector<Credentials::Capability> caps; + caps.push_back(Credentials::Capability::SYS_CHROOT); + CHECK(Credentials::SetCapabilities(proc_fd.get(), caps)); + PCHECK(chroot("/") == 0); + + CHECK(Credentials::DropAllCapabilities()); + PCHECK(chroot("/") == -1 && errno == EPERM); +} + +SANDBOX_TEST(Credentials, SetCapabilitiesMatchesLibCap2) { + // Probably missing kernel support. + if (!Credentials::MoveToNewUserNS()) + return; + + base::ScopedFD proc_fd(ProcUtil::OpenProc()); + + std::vector<Credentials::Capability> caps; + caps.push_back(Credentials::Capability::SYS_CHROOT); + CHECK(Credentials::SetCapabilities(proc_fd.get(), caps)); + + ScopedCap actual_cap(cap_get_proc()); + PCHECK(actual_cap != nullptr); + + ScopedCap expected_cap(cap_init()); + PCHECK(expected_cap != nullptr); + + const cap_value_t allowed_cap = CAP_SYS_CHROOT; + for (const cap_flag_t flag : {CAP_EFFECTIVE, CAP_PERMITTED}) { + PCHECK(cap_set_flag(expected_cap.get(), flag, 1, &allowed_cap, CAP_SET) == + 0); + } + + CHECK_EQ(0, cap_compare(expected_cap.get(), actual_cap.get())); +} + +} // namespace. + +} // namespace sandbox. diff --git a/sandbox/linux/services/init_process_reaper.cc b/sandbox/linux/services/init_process_reaper.cc new file mode 100644 index 0000000000..2e0b90b7b5 --- /dev/null +++ b/sandbox/linux/services/init_process_reaper.cc @@ -0,0 +1,101 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/init_process_reaper.h" + +#include <signal.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "base/callback.h" +#include "base/logging.h" +#include "base/posix/eintr_wrapper.h" + +namespace sandbox { + +namespace { + +void DoNothingSignalHandler(int signal) {} + +} // namespace + +bool CreateInitProcessReaper(base::Closure* post_fork_parent_callback) { + int sync_fds[2]; + // We want to use send, so we can't use a pipe + if (socketpair(AF_UNIX, SOCK_STREAM, 0, sync_fds)) { + PLOG(ERROR) << "Failed to create socketpair"; + return false; + } + pid_t child_pid = fork(); + if (child_pid == -1) { + int close_ret; + close_ret = IGNORE_EINTR(close(sync_fds[0])); + DPCHECK(!close_ret); + close_ret = IGNORE_EINTR(close(sync_fds[1])); + DPCHECK(!close_ret); + return false; + } + if (child_pid) { + // In the parent, assuming the role of an init process. + // The disposition for SIGCHLD cannot be SIG_IGN or wait() will only return + // once all of our childs are dead. Since we're init we need to reap childs + // as they come. + struct sigaction action; + memset(&action, 0, sizeof(action)); + action.sa_handler = &DoNothingSignalHandler; + CHECK(sigaction(SIGCHLD, &action, NULL) == 0); + + int close_ret; + close_ret = IGNORE_EINTR(close(sync_fds[0])); + DPCHECK(!close_ret); + close_ret = shutdown(sync_fds[1], SHUT_RD); + DPCHECK(!close_ret); + if (post_fork_parent_callback) + post_fork_parent_callback->Run(); + // Tell the child to continue + CHECK(HANDLE_EINTR(send(sync_fds[1], "C", 1, MSG_NOSIGNAL)) == 1); + close_ret = IGNORE_EINTR(close(sync_fds[1])); + DPCHECK(!close_ret); + + for (;;) { + // Loop until we have reaped our one natural child + siginfo_t reaped_child_info; + int wait_ret = + HANDLE_EINTR(waitid(P_ALL, 0, &reaped_child_info, WEXITED)); + if (wait_ret) + _exit(1); + if (reaped_child_info.si_pid == child_pid) { + int exit_code = 0; + // We're done waiting + if (reaped_child_info.si_code == CLD_EXITED) { + exit_code = reaped_child_info.si_status; + } + // Exit with the same exit code as our parent. Exit with 0 if we got + // signaled. + _exit(exit_code); + } + } + } else { + // The child needs to wait for the parent to run the callback to avoid a + // race condition. + int close_ret; + close_ret = IGNORE_EINTR(close(sync_fds[1])); + DPCHECK(!close_ret); + close_ret = shutdown(sync_fds[0], SHUT_WR); + DPCHECK(!close_ret); + char should_continue; + int read_ret = HANDLE_EINTR(read(sync_fds[0], &should_continue, 1)); + close_ret = IGNORE_EINTR(close(sync_fds[0])); + DPCHECK(!close_ret); + if (read_ret == 1) + return true; + else + return false; + } +} + +} // namespace sandbox. diff --git a/sandbox/linux/services/init_process_reaper.h b/sandbox/linux/services/init_process_reaper.h new file mode 100644 index 0000000000..840f6fcda7 --- /dev/null +++ b/sandbox/linux/services/init_process_reaper.h @@ -0,0 +1,25 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SERVICES_INIT_PROCESS_REAPER_H_ +#define SANDBOX_LINUX_SERVICES_INIT_PROCESS_REAPER_H_ + +#include "base/callback_forward.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +// The current process will fork(). The parent will become a process reaper +// like init(1). The child will continue normally (after this function +// returns). +// If not NULL, |post_fork_parent_callback| will run in the parent almost +// immediately after fork(). +// Since this function calls fork(), it's very important that the caller has +// only one thread running. +SANDBOX_EXPORT bool CreateInitProcessReaper( + base::Closure* post_fork_parent_callback); + +} // namespace sandbox. + +#endif // SANDBOX_LINUX_SERVICES_INIT_PROCESS_REAPER_H_ diff --git a/sandbox/linux/services/namespace_sandbox.cc b/sandbox/linux/services/namespace_sandbox.cc new file mode 100644 index 0000000000..23796446f3 --- /dev/null +++ b/sandbox/linux/services/namespace_sandbox.cc @@ -0,0 +1,208 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/namespace_sandbox.h" + +#include <sched.h> +#include <signal.h> +#include <stdlib.h> +#include <sys/types.h> +#include <unistd.h> + +#include <string> +#include <utility> +#include <vector> + +#include "base/command_line.h" +#include "base/environment.h" +#include "base/files/scoped_file.h" +#include "base/logging.h" +#include "base/macros.h" +#include "base/posix/eintr_wrapper.h" +#include "base/process/launch.h" +#include "base/process/process.h" +#include "sandbox/linux/services/credentials.h" +#include "sandbox/linux/services/namespace_utils.h" + +namespace sandbox { + +namespace { + +const char kSandboxUSERNSEnvironmentVarName[] = "SBX_USER_NS"; +const char kSandboxPIDNSEnvironmentVarName[] = "SBX_PID_NS"; +const char kSandboxNETNSEnvironmentVarName[] = "SBX_NET_NS"; + +#if !defined(OS_NACL_NONSFI) +class WriteUidGidMapDelegate : public base::LaunchOptions::PreExecDelegate { + public: + WriteUidGidMapDelegate() + : uid_(getuid()), + gid_(getgid()), + supports_deny_setgroups_( + NamespaceUtils::KernelSupportsDenySetgroups()) {} + + ~WriteUidGidMapDelegate() override {} + + void RunAsyncSafe() override { + if (supports_deny_setgroups_) { + RAW_CHECK(NamespaceUtils::DenySetgroups()); + } + RAW_CHECK(NamespaceUtils::WriteToIdMapFile("/proc/self/uid_map", uid_)); + RAW_CHECK(NamespaceUtils::WriteToIdMapFile("/proc/self/gid_map", gid_)); + } + + private: + const uid_t uid_; + const gid_t gid_; + const bool supports_deny_setgroups_; + DISALLOW_COPY_AND_ASSIGN(WriteUidGidMapDelegate); +}; + +void SetEnvironForNamespaceType(base::EnvironmentMap* environ, + base::NativeEnvironmentString env_var, + bool value) { + // An empty string causes the env var to be unset in the child process. + (*environ)[env_var] = value ? "1" : ""; +} + +// Linux supports up to 64 signals. This should be updated if that ever changes. +int g_signal_exit_codes[64]; + +void TerminationSignalHandler(int sig) { + // Return a special exit code so that the process is detected as terminated by + // a signal. + const size_t sig_idx = static_cast<size_t>(sig); + if (sig_idx < arraysize(g_signal_exit_codes)) { + _exit(g_signal_exit_codes[sig_idx]); + } + + _exit(NamespaceSandbox::kDefaultExitCode); +} +#endif // !defined(OS_NACL_NONSFI) + +} // namespace + +#if !defined(OS_NACL_NONSFI) +// static +base::Process NamespaceSandbox::LaunchProcess( + const base::CommandLine& cmdline, + const base::LaunchOptions& options) { + return LaunchProcess(cmdline.argv(), options); +} + +// static +base::Process NamespaceSandbox::LaunchProcess( + const std::vector<std::string>& argv, + const base::LaunchOptions& options) { + int clone_flags = 0; + int ns_types[] = {CLONE_NEWUSER, CLONE_NEWPID, CLONE_NEWNET}; + for (const int ns_type : ns_types) { + if (NamespaceUtils::KernelSupportsUnprivilegedNamespace(ns_type)) { + clone_flags |= ns_type; + } + } + CHECK(clone_flags & CLONE_NEWUSER); + + // These fields may not be set by the caller. + CHECK(options.pre_exec_delegate == nullptr); + CHECK_EQ(0, options.clone_flags); + + WriteUidGidMapDelegate write_uid_gid_map_delegate; + + base::LaunchOptions launch_options = options; + launch_options.pre_exec_delegate = &write_uid_gid_map_delegate; + launch_options.clone_flags = clone_flags; + + const std::pair<int, const char*> clone_flag_environ[] = { + std::make_pair(CLONE_NEWUSER, kSandboxUSERNSEnvironmentVarName), + std::make_pair(CLONE_NEWPID, kSandboxPIDNSEnvironmentVarName), + std::make_pair(CLONE_NEWNET, kSandboxNETNSEnvironmentVarName), + }; + + base::EnvironmentMap* environ = &launch_options.environ; + for (const auto& entry : clone_flag_environ) { + const int flag = entry.first; + const char* environ_name = entry.second; + SetEnvironForNamespaceType(environ, environ_name, clone_flags & flag); + } + + return base::LaunchProcess(argv, launch_options); +} + +// static +pid_t NamespaceSandbox::ForkInNewPidNamespace(bool drop_capabilities_in_child) { + const pid_t pid = + base::ForkWithFlags(CLONE_NEWPID | SIGCHLD, nullptr, nullptr); + if (pid < 0) { + return pid; + } + + if (pid == 0) { + DCHECK_EQ(1, getpid()); + if (drop_capabilities_in_child) { + // Since we just forked, we are single-threaded, so this should be safe. + CHECK(Credentials::DropAllCapabilitiesOnCurrentThread()); + } + return 0; + } + + return pid; +} + +// static +void NamespaceSandbox::InstallDefaultTerminationSignalHandlers() { + static const int kDefaultTermSignals[] = { + SIGHUP, SIGINT, SIGABRT, SIGQUIT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2, + }; + + for (const int sig : kDefaultTermSignals) { + InstallTerminationSignalHandler(sig, kDefaultExitCode); + } +} + +// static +bool NamespaceSandbox::InstallTerminationSignalHandler( + int sig, + int exit_code) { + struct sigaction old_action; + PCHECK(sigaction(sig, nullptr, &old_action) == 0); + + if (old_action.sa_flags & SA_SIGINFO && + old_action.sa_sigaction != nullptr) { + return false; + } else if (old_action.sa_handler != SIG_DFL) { + return false; + } + + const size_t sig_idx = static_cast<size_t>(sig); + CHECK_LT(sig_idx, arraysize(g_signal_exit_codes)); + + DCHECK_GE(exit_code, 0); + DCHECK_LT(exit_code, 256); + + g_signal_exit_codes[sig_idx] = exit_code; + + struct sigaction action = {}; + action.sa_handler = &TerminationSignalHandler; + PCHECK(sigaction(sig, &action, nullptr) == 0); + return true; +} +#endif // !defined(OS_NACL_NONSFI) + +// static +bool NamespaceSandbox::InNewUserNamespace() { + return getenv(kSandboxUSERNSEnvironmentVarName) != nullptr; +} + +// static +bool NamespaceSandbox::InNewPidNamespace() { + return getenv(kSandboxPIDNSEnvironmentVarName) != nullptr; +} + +// static +bool NamespaceSandbox::InNewNetNamespace() { + return getenv(kSandboxNETNSEnvironmentVarName) != nullptr; +} + +} // namespace sandbox diff --git a/sandbox/linux/services/namespace_sandbox.h b/sandbox/linux/services/namespace_sandbox.h new file mode 100644 index 0000000000..80097fb16a --- /dev/null +++ b/sandbox/linux/services/namespace_sandbox.h @@ -0,0 +1,101 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SERVICES_NAMESPACE_SANDBOX_H_ +#define SANDBOX_LINUX_SERVICES_NAMESPACE_SANDBOX_H_ + +#include <sys/types.h> + +#include <string> +#include <vector> + +#include "base/command_line.h" +#include "base/macros.h" +#include "base/process/launch.h" +#include "base/process/process.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +// Helper class for starting a process inside a new user, PID, and network +// namespace. Before using a namespace sandbox, check for namespaces support +// using Credentials::CanCreateProcessInNewUserNS. +// +// A typical use for "A" launching a sandboxed process "B" would be: +// 1. A sets up a command line and launch options for process B. +// 2. A launches B with LaunchProcess. +// 3. B should be prepared to assume the role of init(1). In particular, apart +// from SIGKILL and SIGSTOP, B cannot receive any signal for which it does +// not have an explicit signal handler registered. +// If B dies, all the processes in the namespace will die. +// B can fork() and the parent can assume the role of init(1), by using +// CreateInitProcessReaper(). +// 4. B chroots using Credentials::MoveToNewUserNS() and +// Credentials::DropFileSystemAccess() +// 5. B drops capabilities gained by entering the new user namespace with +// Credentials::DropAllCapabilities(). +class SANDBOX_EXPORT NamespaceSandbox { + public: +#if !defined(OS_NACL_NONSFI) + static const int kDefaultExitCode = 1; + + // Launch a new process inside its own user/PID/network namespaces (depending + // on kernel support). Requires at a minimum that user namespaces are + // supported (use Credentials::CanCreateProcessInNewUserNS to check this). + // + // pre_exec_delegate and clone_flags fields of LaunchOptions should be nullptr + // and 0, respectively, since this function makes a copy of options and + // overrides them. + static base::Process LaunchProcess(const base::CommandLine& cmdline, + const base::LaunchOptions& options); + static base::Process LaunchProcess(const std::vector<std::string>& argv, + const base::LaunchOptions& options); + + // Forks a process in its own PID namespace. The child process is the init + // process inside of the PID namespace, so if the child needs to fork further, + // it should call CreateInitProcessReaper, which turns the init process into a + // reaper process. + // + // Otherwise, the child should setup handlers for signals which should + // terminate the process using InstallDefaultTerminationSignalHandlers or + // InstallTerminationSignalHandler. This works around the fact that init + // processes ignore such signals unless they have an explicit handler set. + // + // This function requries CAP_SYS_ADMIN. If |drop_capabilities_in_child| is + // true, then capabilities are dropped in the child. + static pid_t ForkInNewPidNamespace(bool drop_capabilities_in_child); + + // Installs a signal handler for: + // + // SIGHUP, SIGINT, SIGABRT, SIGQUIT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2 + // + // that exits with kDefaultExitCode. These are signals whose default action is + // to terminate the program (apart from SIGILL, SIGFPE, and SIGSEGV, which + // will still terminate the process if e.g. an illegal instruction is + // encountered, etc.). + // + // If any of these already had a signal handler installed, this function will + // not override them. + static void InstallDefaultTerminationSignalHandlers(); + + // Installs a signal handler for |sig| which exits with |exit_code|. If a + // signal handler was already present for |sig|, does nothing and returns + // false. + static bool InstallTerminationSignalHandler(int sig, int exit_code); +#endif // !defined(OS_NACL_NONSFI) + + // Returns whether the namespace sandbox created a new user, PID, and network + // namespace. In particular, InNewUserNamespace should return true iff the + // process was started via this class. + static bool InNewUserNamespace(); + static bool InNewPidNamespace(); + static bool InNewNetNamespace(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(NamespaceSandbox); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SERVICES_NAMESPACE_SANDBOX_H_ diff --git a/sandbox/linux/services/namespace_sandbox_unittest.cc b/sandbox/linux/services/namespace_sandbox_unittest.cc new file mode 100644 index 0000000000..547ef6728c --- /dev/null +++ b/sandbox/linux/services/namespace_sandbox_unittest.cc @@ -0,0 +1,217 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/namespace_sandbox.h" + +#include <signal.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <string> +#include <utility> + +#include "base/command_line.h" +#include "base/files/file_enumerator.h" +#include "base/files/file_path.h" +#include "base/logging.h" +#include "base/memory/scoped_ptr.h" +#include "base/process/launch.h" +#include "base/process/process.h" +#include "base/test/multiprocess_test.h" +#include "sandbox/linux/services/credentials.h" +#include "sandbox/linux/services/namespace_utils.h" +#include "sandbox/linux/services/proc_util.h" +#include "sandbox/linux/tests/unit_tests.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/multiprocess_func_list.h" + +namespace sandbox { + +namespace { + +bool RootDirectoryIsEmpty() { + base::FilePath root("/"); + int file_type = + base::FileEnumerator::DIRECTORIES | base::FileEnumerator::FILES; + base::FileEnumerator enumerator_before(root, false, file_type); + return enumerator_before.Next().empty(); +} + +class NamespaceSandboxTest : public base::MultiProcessTest { + public: + void TestProc(const std::string& procname) { + if (!Credentials::CanCreateProcessInNewUserNS()) { + return; + } + + base::FileHandleMappingVector fds_to_remap = { + std::make_pair(STDOUT_FILENO, STDOUT_FILENO), + std::make_pair(STDERR_FILENO, STDERR_FILENO), + }; + base::LaunchOptions launch_options; + launch_options.fds_to_remap = &fds_to_remap; + + base::Process process = + NamespaceSandbox::LaunchProcess(MakeCmdLine(procname), launch_options); + ASSERT_TRUE(process.IsValid()); + + const int kDummyExitCode = 42; + int exit_code = kDummyExitCode; + EXPECT_TRUE(process.WaitForExit(&exit_code)); + EXPECT_EQ(0, exit_code); + } +}; + +MULTIPROCESS_TEST_MAIN(SimpleChildProcess) { + scoped_ptr<base::Environment> env(base::Environment::Create()); + bool in_user_ns = NamespaceSandbox::InNewUserNamespace(); + bool in_pid_ns = NamespaceSandbox::InNewPidNamespace(); + bool in_net_ns = NamespaceSandbox::InNewNetNamespace(); + CHECK(in_user_ns); + CHECK_EQ(in_pid_ns, + NamespaceUtils::KernelSupportsUnprivilegedNamespace(CLONE_NEWPID)); + CHECK_EQ(in_net_ns, + NamespaceUtils::KernelSupportsUnprivilegedNamespace(CLONE_NEWNET)); + if (in_pid_ns) { + CHECK_EQ(1, getpid()); + } + return 0; +} + +TEST_F(NamespaceSandboxTest, BasicUsage) { + TestProc("SimpleChildProcess"); +} + +MULTIPROCESS_TEST_MAIN(ChrootMe) { + CHECK(!RootDirectoryIsEmpty()); + CHECK(sandbox::Credentials::MoveToNewUserNS()); + CHECK(sandbox::Credentials::DropFileSystemAccess(ProcUtil::OpenProc().get())); + CHECK(RootDirectoryIsEmpty()); + return 0; +} + +// Temporarily disabled on ASAN due to crbug.com/451603. +TEST_F(NamespaceSandboxTest, DISABLE_ON_ASAN(ChrootAndDropCapabilities)) { + TestProc("ChrootMe"); +} + +MULTIPROCESS_TEST_MAIN(NestedNamespaceSandbox) { + base::FileHandleMappingVector fds_to_remap = { + std::make_pair(STDOUT_FILENO, STDOUT_FILENO), + std::make_pair(STDERR_FILENO, STDERR_FILENO), + }; + base::LaunchOptions launch_options; + launch_options.fds_to_remap = &fds_to_remap; + base::Process process = NamespaceSandbox::LaunchProcess( + base::CommandLine(base::FilePath("/bin/true")), launch_options); + CHECK(process.IsValid()); + + const int kDummyExitCode = 42; + int exit_code = kDummyExitCode; + CHECK(process.WaitForExit(&exit_code)); + CHECK_EQ(0, exit_code); + return 0; +} + +TEST_F(NamespaceSandboxTest, NestedNamespaceSandbox) { + TestProc("NestedNamespaceSandbox"); +} + +const int kNormalExitCode = 0; +const int kSignalTerminationExitCode = 255; + +// Ensure that CHECK(false) is distinguishable from _exit(kNormalExitCode). +// Allowing noise since CHECK(false) will write a stack trace to stderr. +SANDBOX_TEST_ALLOW_NOISE(ForkInNewPidNamespace, CheckDoesNotReturnZero) { + if (!Credentials::CanCreateProcessInNewUserNS()) { + return; + } + + CHECK(sandbox::Credentials::MoveToNewUserNS()); + const pid_t pid = NamespaceSandbox::ForkInNewPidNamespace( + /*drop_capabilities_in_child=*/true); + CHECK_GE(pid, 0); + + if (pid == 0) { + CHECK(false); + _exit(kNormalExitCode); + } + + int status; + PCHECK(waitpid(pid, &status, 0) == pid); + if (WIFEXITED(status)) { + CHECK_NE(kNormalExitCode, WEXITSTATUS(status)); + } +} + +SANDBOX_TEST(ForkInNewPidNamespace, BasicUsage) { + if (!Credentials::CanCreateProcessInNewUserNS()) { + return; + } + + CHECK(sandbox::Credentials::MoveToNewUserNS()); + const pid_t pid = NamespaceSandbox::ForkInNewPidNamespace( + /*drop_capabilities_in_child=*/true); + CHECK_GE(pid, 0); + + if (pid == 0) { + CHECK_EQ(1, getpid()); + CHECK(!Credentials::HasAnyCapability()); + _exit(kNormalExitCode); + } + + int status; + PCHECK(waitpid(pid, &status, 0) == pid); + CHECK(WIFEXITED(status)); + CHECK_EQ(kNormalExitCode, WEXITSTATUS(status)); +} + +SANDBOX_TEST(ForkInNewPidNamespace, ExitWithSignal) { + if (!Credentials::CanCreateProcessInNewUserNS()) { + return; + } + + CHECK(sandbox::Credentials::MoveToNewUserNS()); + const pid_t pid = NamespaceSandbox::ForkInNewPidNamespace( + /*drop_capabilities_in_child=*/true); + CHECK_GE(pid, 0); + + if (pid == 0) { + CHECK_EQ(1, getpid()); + CHECK(!Credentials::HasAnyCapability()); + CHECK(NamespaceSandbox::InstallTerminationSignalHandler( + SIGTERM, kSignalTerminationExitCode)); + while (true) { + raise(SIGTERM); + } + } + + int status; + PCHECK(waitpid(pid, &status, 0) == pid); + CHECK(WIFEXITED(status)); + CHECK_EQ(kSignalTerminationExitCode, WEXITSTATUS(status)); +} + +volatile sig_atomic_t signal_handler_called; +void ExitSuccessfully(int sig) { + signal_handler_called = 1; +} + +SANDBOX_TEST(InstallTerminationSignalHandler, DoesNotOverrideExistingHandlers) { + struct sigaction action = {}; + action.sa_handler = &ExitSuccessfully; + PCHECK(sigaction(SIGUSR1, &action, nullptr) == 0); + + NamespaceSandbox::InstallDefaultTerminationSignalHandlers(); + CHECK(!NamespaceSandbox::InstallTerminationSignalHandler( + SIGUSR1, kSignalTerminationExitCode)); + + raise(SIGUSR1); + CHECK_EQ(1, signal_handler_called); +} + +} // namespace + +} // namespace sandbox diff --git a/sandbox/linux/services/namespace_utils.cc b/sandbox/linux/services/namespace_utils.cc new file mode 100644 index 0000000000..29b649c078 --- /dev/null +++ b/sandbox/linux/services/namespace_utils.cc @@ -0,0 +1,117 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/namespace_utils.h" + +#include <fcntl.h> +#include <sched.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <string> + +#include "base/files/file_path.h" +#include "base/files/file_util.h" +#include "base/files/scoped_file.h" +#include "base/logging.h" +#include "base/posix/eintr_wrapper.h" +#include "base/process/launch.h" +#include "base/strings/safe_sprintf.h" +#include "third_party/valgrind/valgrind.h" + +namespace sandbox { + +namespace { +bool IsRunningOnValgrind() { + return RUNNING_ON_VALGRIND; +} + +const char kProcSelfSetgroups[] = "/proc/self/setgroups"; +} // namespace + +// static +bool NamespaceUtils::WriteToIdMapFile(const char* map_file, generic_id_t id) { + // This function needs to be async-signal-safe, as it may be called in between + // fork and exec. + + int fd = HANDLE_EINTR(open(map_file, O_WRONLY)); + if (fd == -1) { + return false; + } + + const generic_id_t inside_id = id; + const generic_id_t outside_id = id; + + char mapping[64]; + const ssize_t len = + base::strings::SafeSPrintf(mapping, "%d %d 1\n", inside_id, outside_id); + const ssize_t rc = HANDLE_EINTR(write(fd, mapping, len)); + RAW_CHECK(IGNORE_EINTR(close(fd)) == 0); + return rc == len; +} + +// static +bool NamespaceUtils::KernelSupportsUnprivilegedNamespace(int type) { + // Valgrind will let clone(2) pass-through, but doesn't support unshare(), + // so always consider namespaces unsupported there. + if (IsRunningOnValgrind()) { + return false; + } + + // As of Linux 3.8, /proc/self/ns/* files exist for all namespace types. Since + // user namespaces were added in 3.8, it is OK to rely on the existence of + // /proc/self/ns/*. + if (!base::PathExists(base::FilePath("/proc/self/ns/user"))) { + return false; + } + + const char* path; + switch (type) { + case CLONE_NEWUSER: + return true; + case CLONE_NEWIPC: + path = "/proc/self/ns/ipc"; + break; + case CLONE_NEWNET: + path = "/proc/self/ns/net"; + break; + case CLONE_NEWNS: + path = "/proc/self/ns/mnt"; + break; + case CLONE_NEWPID: + path = "/proc/self/ns/pid"; + break; + case CLONE_NEWUTS: + path = "/proc/self/ns/uts"; + break; + default: + NOTREACHED(); + return false; + } + + return base::PathExists(base::FilePath(path)); +} + +// static +bool NamespaceUtils::KernelSupportsDenySetgroups() { + return base::PathExists(base::FilePath(kProcSelfSetgroups)); +} + +// static +bool NamespaceUtils::DenySetgroups() { + // This function needs to be async-signal-safe. + int fd = HANDLE_EINTR(open(kProcSelfSetgroups, O_WRONLY)); + if (fd == -1) { + return false; + } + + static const char kDeny[] = "deny"; + const ssize_t len = sizeof(kDeny) - 1; + const ssize_t rc = HANDLE_EINTR(write(fd, kDeny, len)); + RAW_CHECK(IGNORE_EINTR(close(fd)) == 0); + return rc == len; +} + +} // namespace sandbox diff --git a/sandbox/linux/services/namespace_utils.h b/sandbox/linux/services/namespace_utils.h new file mode 100644 index 0000000000..f3c88a9452 --- /dev/null +++ b/sandbox/linux/services/namespace_utils.h @@ -0,0 +1,53 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SERVICES_NAMESPACE_UTILS_H_ +#define SANDBOX_LINUX_SERVICES_NAMESPACE_UTILS_H_ + +#include <sys/types.h> + +#include "base/compiler_specific.h" +#include "base/macros.h" +#include "base/template_util.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +// Utility functions for using Linux namepaces. +class SANDBOX_EXPORT NamespaceUtils { + public: + COMPILE_ASSERT((base::is_same<uid_t, gid_t>::value), UidAndGidAreSameType); + // generic_id_t can be used for either uid_t or gid_t. + typedef uid_t generic_id_t; + + // Write a uid or gid mapping from |id| to |id| in |map_file|. This function + // is async-signal-safe. + static bool WriteToIdMapFile(const char* map_file, + generic_id_t id) WARN_UNUSED_RESULT; + + // Returns true if unprivileged namespaces of type |type| is supported + // (meaning that both CLONE_NEWUSER and type are are supported). |type| must + // be one of CLONE_NEWIPC, CLONE_NEWNET, CLONE_NEWNS, CLONE_NEWPID, + // CLONE_NEWUSER, or CLONE_NEWUTS. This relies on access to /proc, so it will + // not work from within a sandbox. + static bool KernelSupportsUnprivilegedNamespace(int type); + + // Returns true if the kernel supports denying setgroups in a user namespace. + // On kernels where this is supported, DenySetgroups must be called before a + // gid mapping can be added. + static bool KernelSupportsDenySetgroups(); + + // Disables setgroups() within the current user namespace. On Linux 3.18.2 and + // later, this is required in order to write to /proc/self/gid_map without + // having CAP_SETGID. Callers can determine whether is this needed with + // KernelSupportsDenySetgroups. This function is async-signal-safe. + static bool DenySetgroups() WARN_UNUSED_RESULT; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(NamespaceUtils); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SERVICES_NAMESPACE_UTILS_H_ diff --git a/sandbox/linux/services/namespace_utils_unittest.cc b/sandbox/linux/services/namespace_utils_unittest.cc new file mode 100644 index 0000000000..41ed7e89a6 --- /dev/null +++ b/sandbox/linux/services/namespace_utils_unittest.cc @@ -0,0 +1,72 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/namespace_utils.h" + +#include <errno.h> +#include <sched.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include "base/logging.h" +#include "base/posix/eintr_wrapper.h" +#include "base/process/launch.h" +#include "sandbox/linux/services/credentials.h" +#include "sandbox/linux/tests/unit_tests.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace sandbox { + +namespace { + +SANDBOX_TEST(NamespaceUtils, KernelSupportsUnprivilegedNamespace) { + const bool can_create_user_ns = Credentials::CanCreateProcessInNewUserNS(); + const bool supports_user_ns = + NamespaceUtils::KernelSupportsUnprivilegedNamespace(CLONE_NEWUSER); + // can_create_user_ns implies supports_user_ns, but the converse is not + // necessarily true, as creating a user namespace can fail for various + // reasons. + if (can_create_user_ns) { + SANDBOX_ASSERT(supports_user_ns); + } +} + +SANDBOX_TEST(NamespaceUtils, WriteToIdMapFile) { + if (!Credentials::CanCreateProcessInNewUserNS()) { + return; + } + + const uid_t uid = getuid(); + const gid_t gid = getgid(); + + const bool supports_deny_setgroups = + NamespaceUtils::KernelSupportsDenySetgroups(); + + const pid_t pid = + base::ForkWithFlags(CLONE_NEWUSER | SIGCHLD, nullptr, nullptr); + ASSERT_NE(-1, pid); + if (pid == 0) { + if (supports_deny_setgroups) { + RAW_CHECK(NamespaceUtils::DenySetgroups()); + } + + RAW_CHECK(getuid() != uid); + RAW_CHECK(NamespaceUtils::WriteToIdMapFile("/proc/self/uid_map", uid)); + RAW_CHECK(getuid() == uid); + + RAW_CHECK(getgid() != gid); + RAW_CHECK(NamespaceUtils::WriteToIdMapFile("/proc/self/gid_map", gid)); + RAW_CHECK(getgid() == gid); + + _exit(0); + } + + int status = 42; + SANDBOX_ASSERT_EQ(pid, HANDLE_EINTR(waitpid(pid, &status, 0))); + SANDBOX_ASSERT_EQ(0, status); +} + +} // namespace. + +} // namespace sandbox. diff --git a/sandbox/linux/services/proc_util.cc b/sandbox/linux/services/proc_util.cc new file mode 100644 index 0000000000..d3f755f9a1 --- /dev/null +++ b/sandbox/linux/services/proc_util.cc @@ -0,0 +1,119 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/proc_util.h" + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "base/logging.h" +#include "base/memory/scoped_ptr.h" +#include "base/posix/eintr_wrapper.h" +#include "base/strings/string_number_conversions.h" + +namespace sandbox { +namespace { + +struct DIRCloser { + void operator()(DIR* d) const { + DCHECK(d); + PCHECK(0 == closedir(d)); + } +}; + +typedef scoped_ptr<DIR, DIRCloser> ScopedDIR; + +base::ScopedFD OpenDirectory(const char* path) { + DCHECK(path); + base::ScopedFD directory_fd( + HANDLE_EINTR(open(path, O_RDONLY | O_DIRECTORY | O_CLOEXEC))); + PCHECK(directory_fd.is_valid()); + return directory_fd.Pass(); +} + +} // namespace + +int ProcUtil::CountOpenFds(int proc_fd) { + DCHECK_LE(0, proc_fd); + int proc_self_fd = HANDLE_EINTR( + openat(proc_fd, "self/fd/", O_DIRECTORY | O_RDONLY | O_CLOEXEC)); + PCHECK(0 <= proc_self_fd); + + // Ownership of proc_self_fd is transferred here, it must not be closed + // or modified afterwards except via dir. + ScopedDIR dir(fdopendir(proc_self_fd)); + CHECK(dir); + + int count = 0; + struct dirent e; + struct dirent* de; + while (!readdir_r(dir.get(), &e, &de) && de) { + if (strcmp(e.d_name, ".") == 0 || strcmp(e.d_name, "..") == 0) { + continue; + } + + int fd_num; + CHECK(base::StringToInt(e.d_name, &fd_num)); + if (fd_num == proc_fd || fd_num == proc_self_fd) { + continue; + } + + ++count; + } + return count; +} + +bool ProcUtil::HasOpenDirectory(int proc_fd) { + DCHECK_LE(0, proc_fd); + int proc_self_fd = + openat(proc_fd, "self/fd/", O_DIRECTORY | O_RDONLY | O_CLOEXEC); + + PCHECK(0 <= proc_self_fd); + + // Ownership of proc_self_fd is transferred here, it must not be closed + // or modified afterwards except via dir. + ScopedDIR dir(fdopendir(proc_self_fd)); + CHECK(dir); + + struct dirent e; + struct dirent* de; + while (!readdir_r(dir.get(), &e, &de) && de) { + if (strcmp(e.d_name, ".") == 0 || strcmp(e.d_name, "..") == 0) { + continue; + } + + int fd_num; + CHECK(base::StringToInt(e.d_name, &fd_num)); + if (fd_num == proc_fd || fd_num == proc_self_fd) { + continue; + } + + struct stat s; + // It's OK to use proc_self_fd here, fstatat won't modify it. + CHECK(fstatat(proc_self_fd, e.d_name, &s, 0) == 0); + if (S_ISDIR(s.st_mode)) { + return true; + } + } + + // No open unmanaged directories found. + return false; +} + +bool ProcUtil::HasOpenDirectory() { + base::ScopedFD proc_fd( + HANDLE_EINTR(open("/proc/", O_DIRECTORY | O_RDONLY | O_CLOEXEC))); + return HasOpenDirectory(proc_fd.get()); +} + +//static +base::ScopedFD ProcUtil::OpenProc() { + return OpenDirectory("/proc/"); +} + +} // namespace sandbox diff --git a/sandbox/linux/services/proc_util.h b/sandbox/linux/services/proc_util.h new file mode 100644 index 0000000000..bc14c5ef2a --- /dev/null +++ b/sandbox/linux/services/proc_util.h @@ -0,0 +1,42 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SERVICES_PROC_UTIL_H_ +#define SANDBOX_LINUX_SERVICES_PROC_UTIL_H_ + +#include "base/files/scoped_file.h" +#include "base/macros.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +class SANDBOX_EXPORT ProcUtil { + public: + // Returns the number of file descriptors in the current process's FD + // table, excluding |proc_fd|, which should be a file descriptor for + // /proc/. + static int CountOpenFds(int proc_fd); + + // Checks whether the current process has any directory file descriptor open. + // Directory file descriptors are "capabilities" that would let a process use + // system calls such as openat() to bypass restrictions such as + // DropFileSystemAccess(). + // Sometimes it's useful to call HasOpenDirectory() after file system access + // has been dropped. In this case, |proc_fd| should be a file descriptor to + // /proc/. The file descriptor in |proc_fd| will be ignored by + // HasOpenDirectory() and remains owned by the caller. It is very important + // for the caller to close it. + static bool HasOpenDirectory(int proc_fd) WARN_UNUSED_RESULT; + static bool HasOpenDirectory() WARN_UNUSED_RESULT; + + // Open /proc/ or crash if not possible. + static base::ScopedFD OpenProc(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ProcUtil); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SERVICES_PROC_UTIL_H_ diff --git a/sandbox/linux/services/proc_util_unittest.cc b/sandbox/linux/services/proc_util_unittest.cc new file mode 100644 index 0000000000..bf25151956 --- /dev/null +++ b/sandbox/linux/services/proc_util_unittest.cc @@ -0,0 +1,62 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/proc_util.h" + +#include <fcntl.h> +#include <unistd.h> + +#include "base/files/scoped_file.h" +#include "base/posix/eintr_wrapper.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace sandbox { + +TEST(ProcUtil, CountOpenFds) { + base::ScopedFD proc_fd(open("/proc/", O_RDONLY | O_DIRECTORY)); + ASSERT_TRUE(proc_fd.is_valid()); + int fd_count = ProcUtil::CountOpenFds(proc_fd.get()); + int fd = open("/dev/null", O_RDONLY); + ASSERT_LE(0, fd); + EXPECT_EQ(fd_count + 1, ProcUtil::CountOpenFds(proc_fd.get())); + ASSERT_EQ(0, IGNORE_EINTR(close(fd))); + EXPECT_EQ(fd_count, ProcUtil::CountOpenFds(proc_fd.get())); +} + +TEST(ProcUtil, HasOpenDirectory) { + // No open directory should exist at startup. + EXPECT_FALSE(ProcUtil::HasOpenDirectory()); + { + // Have a "/proc" file descriptor around. + int proc_fd = open("/proc/", O_RDONLY | O_DIRECTORY); + base::ScopedFD proc_fd_closer(proc_fd); + EXPECT_TRUE(ProcUtil::HasOpenDirectory()); + } + EXPECT_FALSE(ProcUtil::HasOpenDirectory()); +} + +TEST(ProcUtil, HasOpenDirectoryWithFD) { + int proc_fd = open("/proc/", O_RDONLY | O_DIRECTORY); + base::ScopedFD proc_fd_closer(proc_fd); + ASSERT_LE(0, proc_fd); + + // Don't pass |proc_fd|, an open directory (proc_fd) should + // be detected. + EXPECT_TRUE(ProcUtil::HasOpenDirectory()); + // Pass |proc_fd| and no open directory should be detected. + EXPECT_FALSE(ProcUtil::HasOpenDirectory(proc_fd)); + + { + // Have a directory file descriptor around. + int open_directory_fd = open("/proc/self/", O_RDONLY | O_DIRECTORY); + base::ScopedFD open_directory_fd_closer(open_directory_fd); + EXPECT_TRUE(ProcUtil::HasOpenDirectory(proc_fd)); + } + + // The "/proc/" file descriptor should now be closed, |proc_fd| is the + // only directory file descriptor open. + EXPECT_FALSE(ProcUtil::HasOpenDirectory(proc_fd)); +} + +} // namespace sandbox diff --git a/sandbox/linux/services/resource_limits.cc b/sandbox/linux/services/resource_limits.cc new file mode 100644 index 0000000000..1ec11295d1 --- /dev/null +++ b/sandbox/linux/services/resource_limits.cc @@ -0,0 +1,26 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/resource_limits.h" + +#include <sys/resource.h> +#include <sys/time.h> + +#include <algorithm> + +namespace sandbox { + +// static +bool ResourceLimits::Lower(int resource, rlim_t limit) { + struct rlimit old_rlimit; + if (getrlimit(resource, &old_rlimit)) + return false; + // Make sure we don't raise the existing limit. + const struct rlimit new_rlimit = {std::min(old_rlimit.rlim_cur, limit), + std::min(old_rlimit.rlim_max, limit)}; + int rc = setrlimit(resource, &new_rlimit); + return rc == 0; +} + +} // namespace sandbox diff --git a/sandbox/linux/services/resource_limits.h b/sandbox/linux/services/resource_limits.h new file mode 100644 index 0000000000..3464dab679 --- /dev/null +++ b/sandbox/linux/services/resource_limits.h @@ -0,0 +1,29 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SERVICES_RESOURCE_LIMITS_H_ +#define SANDBOX_LINUX_SERVICES_RESOURCE_LIMITS_H_ + +#include <sys/resource.h> + +#include "base/compiler_specific.h" +#include "base/macros.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +// This class provides a small wrapper around setrlimit(). +class SANDBOX_EXPORT ResourceLimits { + public: + // Lower the soft and hard limit of |resource| to |limit|. If the current + // limit is lower than |limit|, keep it. + static bool Lower(int resource, rlim_t limit) WARN_UNUSED_RESULT; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ResourceLimits); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SERVICES_RESOURCE_LIMITS_H_ diff --git a/sandbox/linux/services/resource_limits_unittests.cc b/sandbox/linux/services/resource_limits_unittests.cc new file mode 100644 index 0000000000..910c740f7b --- /dev/null +++ b/sandbox/linux/services/resource_limits_unittests.cc @@ -0,0 +1,43 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/resource_limits.h" + +#include <errno.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <unistd.h> + +#include "base/logging.h" +#include "sandbox/linux/tests/test_utils.h" +#include "sandbox/linux/tests/unit_tests.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace sandbox { + +namespace { + +// Fails on Android: crbug.com/459158 +#if !defined(OS_ANDROID) +#define MAYBE_NoFork DISABLE_ON_ASAN(NoFork) +#else +#define MAYBE_NoFork DISABLED_NoFork +#endif // OS_ANDROID + +// Not being able to fork breaks LeakSanitizer, so disable on +// all ASAN builds. +SANDBOX_TEST(ResourceLimits, MAYBE_NoFork) { + // Make sure that fork will fail with EAGAIN. + SANDBOX_ASSERT(ResourceLimits::Lower(RLIMIT_NPROC, 0)); + errno = 0; + pid_t pid = fork(); + // Reap any child if fork succeeded. + TestUtils::HandlePostForkReturn(pid); + SANDBOX_ASSERT_EQ(-1, pid); + CHECK_EQ(EAGAIN, errno); +} + +} // namespace + +} // namespace sandbox diff --git a/sandbox/linux/services/scoped_process.cc b/sandbox/linux/services/scoped_process.cc new file mode 100644 index 0000000000..65af4873a4 --- /dev/null +++ b/sandbox/linux/services/scoped_process.cc @@ -0,0 +1,119 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/scoped_process.h" + +#include <fcntl.h> +#include <signal.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "base/callback.h" +#include "base/logging.h" +#include "base/posix/eintr_wrapper.h" +#include "build/build_config.h" +#include "sandbox/linux/services/syscall_wrappers.h" +#include "sandbox/linux/services/thread_helpers.h" + +namespace sandbox { + +namespace { + +const char kSynchronisationChar[] = "D"; + +void WaitForever() { + while(true) { + pause(); + } +} + +} // namespace + +ScopedProcess::ScopedProcess(const base::Closure& child_callback) + : child_process_id_(-1), process_id_(getpid()) { + PCHECK(0 == pipe(pipe_fds_)); +#if !defined(THREAD_SANITIZER) + // Make sure that we can safely fork(). + CHECK(ThreadHelpers::IsSingleThreaded()); +#endif + child_process_id_ = fork(); + PCHECK(0 <= child_process_id_); + + if (0 == child_process_id_) { + PCHECK(0 == IGNORE_EINTR(close(pipe_fds_[0]))); + pipe_fds_[0] = -1; + child_callback.Run(); + // Notify the parent that the closure has run. + CHECK_EQ(1, HANDLE_EINTR(write(pipe_fds_[1], kSynchronisationChar, 1))); + WaitForever(); + NOTREACHED(); + _exit(1); + } + + PCHECK(0 == IGNORE_EINTR(close(pipe_fds_[1]))); + pipe_fds_[1] = -1; +} + +ScopedProcess::~ScopedProcess() { + CHECK(IsOriginalProcess()); + if (child_process_id_ >= 0) { + PCHECK(0 == kill(child_process_id_, SIGKILL)); + siginfo_t process_info; + + PCHECK(0 == HANDLE_EINTR( + waitid(P_PID, child_process_id_, &process_info, WEXITED))); + } + if (pipe_fds_[0] >= 0) { + PCHECK(0 == IGNORE_EINTR(close(pipe_fds_[0]))); + } + if (pipe_fds_[1] >= 0) { + PCHECK(0 == IGNORE_EINTR(close(pipe_fds_[1]))); + } +} + +int ScopedProcess::WaitForExit(bool* got_signaled) { + DCHECK(got_signaled); + CHECK(IsOriginalProcess()); + siginfo_t process_info; + // WNOWAIT to make sure that the destructor can wait on the child. + int ret = HANDLE_EINTR( + waitid(P_PID, child_process_id_, &process_info, WEXITED | WNOWAIT)); + PCHECK(0 == ret) << "Did something else wait on the child?"; + + if (process_info.si_code == CLD_EXITED) { + *got_signaled = false; + } else if (process_info.si_code == CLD_KILLED || + process_info.si_code == CLD_DUMPED) { + *got_signaled = true; + } else { + CHECK(false) << "ScopedProcess needs to be extended for si_code " + << process_info.si_code; + } + return process_info.si_status; +} + +bool ScopedProcess::WaitForClosureToRun() { + char c = 0; + int ret = HANDLE_EINTR(read(pipe_fds_[0], &c, 1)); + PCHECK(ret >= 0); + if (0 == ret) + return false; + + CHECK_EQ(c, kSynchronisationChar[0]); + return true; +} + +// It would be problematic if after a fork(), another process would start using +// this object. +// This method allows to assert it is not happening. +bool ScopedProcess::IsOriginalProcess() { + // Make a direct syscall to bypass glibc caching of PIDs. + pid_t pid = sys_getpid(); + return pid == process_id_; +} + +} // namespace sandbox diff --git a/sandbox/linux/services/scoped_process.h b/sandbox/linux/services/scoped_process.h new file mode 100644 index 0000000000..bddbd5529b --- /dev/null +++ b/sandbox/linux/services/scoped_process.h @@ -0,0 +1,55 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SERVICES_SCOPED_PROCESS_H_ +#define SANDBOX_LINUX_SERVICES_SCOPED_PROCESS_H_ + +#include "base/callback_forward.h" +#include "base/macros.h" +#include "base/process/process_handle.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +// fork() a child process that will run a Closure. +// After the Closure has run, the child will pause forever. If this object +// is detroyed, the child will be destroyed, even if the closure did not +// finish running. It's ok to signal the child from outside of this class to +// destroy it. +// This class cannot be instanciated from a multi-threaded process, as it needs +// to fork(). +class SANDBOX_EXPORT ScopedProcess { + public: + // A new process will be created and |child_callback| will run in the child + // process. This callback is allowed to terminate the process or to simply + // return. If the callback returns, the process will wait forever. + explicit ScopedProcess(const base::Closure& child_callback); + ~ScopedProcess(); + + // Wait for the process to exit. + // |got_signaled| tells how to interpret the return value: either as an exit + // code, or as a signal number. + // When this returns, the process will still not have been reaped and will + // survive as a zombie for the lifetime of this object. This method can be + // called multiple times. + int WaitForExit(bool* got_signaled); + + // Wait for the |child_callback| passed at construction to run. Return false + // if |child_callback| did not finish running and we know it never will (for + // instance the child crashed or used _exit()). + bool WaitForClosureToRun(); + base::ProcessId GetPid() { return child_process_id_; } + + private: + bool IsOriginalProcess(); + + base::ProcessId child_process_id_; + base::ProcessId process_id_; + int pipe_fds_[2]; + DISALLOW_COPY_AND_ASSIGN(ScopedProcess); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SERVICES_SCOPED_PROCESS_H_ diff --git a/sandbox/linux/services/scoped_process_unittest.cc b/sandbox/linux/services/scoped_process_unittest.cc new file mode 100644 index 0000000000..8bd2847997 --- /dev/null +++ b/sandbox/linux/services/scoped_process_unittest.cc @@ -0,0 +1,130 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/scoped_process.h" + +#include <errno.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "base/bind.h" +#include "base/callback.h" +#include "base/files/file_util.h" +#include "base/files/scoped_file.h" +#include "base/logging.h" +#include "base/posix/eintr_wrapper.h" +#include "base/threading/platform_thread.h" +#include "base/time/time.h" +#include "sandbox/linux/tests/unit_tests.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace sandbox { + +namespace { + +void DoExit() { _exit(0); } + +void ExitWithCode(int exit_code) { _exit(exit_code); } + +void RaiseAndExit(int signal) { + PCHECK(0 == raise(signal)); + _exit(0); +} + +void DoNothing() {} + +TEST(ScopedProcess, ScopedProcessNormalExit) { + const int kCustomExitCode = 12; + ScopedProcess process(base::Bind(&ExitWithCode, kCustomExitCode)); + bool got_signaled = true; + int exit_code = process.WaitForExit(&got_signaled); + EXPECT_FALSE(got_signaled); + EXPECT_EQ(kCustomExitCode, exit_code); + + // Verify that WaitForExit() can be called multiple times on the same + // process. + bool got_signaled2 = true; + int exit_code2 = process.WaitForExit(&got_signaled2); + EXPECT_FALSE(got_signaled2); + EXPECT_EQ(kCustomExitCode, exit_code2); +} + +// Disable this test on Android, SIGABRT is funky there. +TEST(ScopedProcess, DISABLE_ON_ANDROID(ScopedProcessAbort)) { + PCHECK(SIG_ERR != signal(SIGABRT, SIG_DFL)); + ScopedProcess process(base::Bind(&RaiseAndExit, SIGABRT)); + bool got_signaled = false; + int exit_code = process.WaitForExit(&got_signaled); + EXPECT_TRUE(got_signaled); + EXPECT_EQ(SIGABRT, exit_code); +} + +TEST(ScopedProcess, ScopedProcessSignaled) { + ScopedProcess process(base::Bind(&DoNothing)); + bool got_signaled = false; + ASSERT_EQ(0, kill(process.GetPid(), SIGKILL)); + int exit_code = process.WaitForExit(&got_signaled); + EXPECT_TRUE(got_signaled); + EXPECT_EQ(SIGKILL, exit_code); +} + +TEST(ScopedProcess, DiesForReal) { + int pipe_fds[2]; + ASSERT_EQ(0, pipe(pipe_fds)); + base::ScopedFD read_end_closer(pipe_fds[0]); + base::ScopedFD write_end_closer(pipe_fds[1]); + + { ScopedProcess process(base::Bind(&DoExit)); } + + // Close writing end of the pipe. + write_end_closer.reset(); + pipe_fds[1] = -1; + + ASSERT_EQ(0, fcntl(pipe_fds[0], F_SETFL, O_NONBLOCK)); + char c; + // If the child process is dead for real, there will be no writing end + // for this pipe left and read will EOF instead of returning EWOULDBLOCK. + ASSERT_EQ(0, read(pipe_fds[0], &c, 1)); +} + +TEST(ScopedProcess, SynchronizationBasic) { + ScopedProcess process1(base::Bind(&DoNothing)); + EXPECT_TRUE(process1.WaitForClosureToRun()); + + ScopedProcess process2(base::Bind(&DoExit)); + // The closure didn't finish running normally. This case is simple enough + // that process.WaitForClosureToRun() should return false, even though the + // API does not guarantees that it will return at all. + EXPECT_FALSE(process2.WaitForClosureToRun()); +} + +void SleepInMsAndWriteOneByte(int time_to_sleep, int fd) { + base::PlatformThread::Sleep(base::TimeDelta::FromMilliseconds(time_to_sleep)); + CHECK(1 == write(fd, "1", 1)); +} + +TEST(ScopedProcess, SynchronizationWorks) { + int pipe_fds[2]; + ASSERT_EQ(0, pipe(pipe_fds)); + base::ScopedFD read_end_closer(pipe_fds[0]); + base::ScopedFD write_end_closer(pipe_fds[1]); + + // Start a process with a closure that takes a little bit to run. + ScopedProcess process( + base::Bind(&SleepInMsAndWriteOneByte, 100, pipe_fds[1])); + EXPECT_TRUE(process.WaitForClosureToRun()); + + // Verify that the closure did, indeed, run. + ASSERT_EQ(0, fcntl(pipe_fds[0], F_SETFL, O_NONBLOCK)); + char c = 0; + EXPECT_EQ(1, read(pipe_fds[0], &c, 1)); + EXPECT_EQ('1', c); +} + +} // namespace + +} // namespace sandbox diff --git a/sandbox/linux/services/syscall_wrappers.cc b/sandbox/linux/services/syscall_wrappers.cc new file mode 100644 index 0000000000..b6e87655a3 --- /dev/null +++ b/sandbox/linux/services/syscall_wrappers.cc @@ -0,0 +1,246 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/syscall_wrappers.h" + +#include <pthread.h> +#include <sched.h> +#include <setjmp.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> +#include <cstring> + +#include "base/compiler_specific.h" +#include "base/logging.h" +#include "build/build_config.h" +#include "sandbox/linux/system_headers/capability.h" +#include "sandbox/linux/system_headers/linux_signal.h" +#include "sandbox/linux/system_headers/linux_syscalls.h" +#include "third_party/valgrind/valgrind.h" + +namespace sandbox { + +pid_t sys_getpid(void) { + return syscall(__NR_getpid); +} + +pid_t sys_gettid(void) { + return syscall(__NR_gettid); +} + +long sys_clone(unsigned long flags, + decltype(nullptr) child_stack, + pid_t* ptid, + pid_t* ctid, + decltype(nullptr) tls) { + const bool clone_tls_used = flags & CLONE_SETTLS; + const bool invalid_ctid = + (flags & (CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)) && !ctid; + const bool invalid_ptid = (flags & CLONE_PARENT_SETTID) && !ptid; + + // We do not support CLONE_VM. + const bool clone_vm_used = flags & CLONE_VM; + if (clone_tls_used || invalid_ctid || invalid_ptid || clone_vm_used) { + RAW_LOG(FATAL, "Invalid usage of sys_clone"); + } + + if (ptid) MSAN_UNPOISON(ptid, sizeof(*ptid)); + if (ctid) MSAN_UNPOISON(ctid, sizeof(*ctid)); + // See kernel/fork.c in Linux. There is different ordering of sys_clone + // parameters depending on CONFIG_CLONE_BACKWARDS* configuration options. +#if defined(ARCH_CPU_X86_64) + return syscall(__NR_clone, flags, child_stack, ptid, ctid, tls); +#elif defined(ARCH_CPU_X86) || defined(ARCH_CPU_ARM_FAMILY) || \ + defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_MIPS64_FAMILY) + // CONFIG_CLONE_BACKWARDS defined. + return syscall(__NR_clone, flags, child_stack, ptid, tls, ctid); +#endif +} + +long sys_clone(unsigned long flags) { + return sys_clone(flags, nullptr, nullptr, nullptr, nullptr); +} + +void sys_exit_group(int status) { + syscall(__NR_exit_group, status); +} + +int sys_seccomp(unsigned int operation, + unsigned int flags, + const struct sock_fprog* args) { + return syscall(__NR_seccomp, operation, flags, args); +} + +int sys_prlimit64(pid_t pid, + int resource, + const struct rlimit64* new_limit, + struct rlimit64* old_limit) { + int res = syscall(__NR_prlimit64, pid, resource, new_limit, old_limit); + if (res == 0 && old_limit) MSAN_UNPOISON(old_limit, sizeof(*old_limit)); + return res; +} + +int sys_capget(cap_hdr* hdrp, cap_data* datap) { + int res = syscall(__NR_capget, hdrp, datap); + if (res == 0) { + if (hdrp) MSAN_UNPOISON(hdrp, sizeof(*hdrp)); + if (datap) MSAN_UNPOISON(datap, sizeof(*datap)); + } + return res; +} + +int sys_capset(cap_hdr* hdrp, const cap_data* datap) { + return syscall(__NR_capset, hdrp, datap); +} + +int sys_getresuid(uid_t* ruid, uid_t* euid, uid_t* suid) { + int res; +#if defined(ARCH_CPU_X86) || defined(ARCH_CPU_ARMEL) + // On 32-bit x86 or 32-bit arm, getresuid supports 16bit values only. + // Use getresuid32 instead. + res = syscall(__NR_getresuid32, ruid, euid, suid); +#else + res = syscall(__NR_getresuid, ruid, euid, suid); +#endif + if (res == 0) { + if (ruid) MSAN_UNPOISON(ruid, sizeof(*ruid)); + if (euid) MSAN_UNPOISON(euid, sizeof(*euid)); + if (suid) MSAN_UNPOISON(suid, sizeof(*suid)); + } + return res; +} + +int sys_getresgid(gid_t* rgid, gid_t* egid, gid_t* sgid) { + int res; +#if defined(ARCH_CPU_X86) || defined(ARCH_CPU_ARMEL) + // On 32-bit x86 or 32-bit arm, getresgid supports 16bit values only. + // Use getresgid32 instead. + res = syscall(__NR_getresgid32, rgid, egid, sgid); +#else + res = syscall(__NR_getresgid, rgid, egid, sgid); +#endif + if (res == 0) { + if (rgid) MSAN_UNPOISON(rgid, sizeof(*rgid)); + if (egid) MSAN_UNPOISON(egid, sizeof(*egid)); + if (sgid) MSAN_UNPOISON(sgid, sizeof(*sgid)); + } + return res; +} + +int sys_chroot(const char* path) { + return syscall(__NR_chroot, path); +} + +int sys_unshare(int flags) { + return syscall(__NR_unshare, flags); +} + +int sys_sigprocmask(int how, const sigset_t* set, decltype(nullptr) oldset) { + // In some toolchain (in particular Android and PNaCl toolchain), + // sigset_t is 32 bits, but Linux ABI requires 64 bits. + uint64_t linux_value = 0; + std::memcpy(&linux_value, set, std::min(sizeof(sigset_t), sizeof(uint64_t))); + return syscall(__NR_rt_sigprocmask, how, &linux_value, nullptr, + sizeof(linux_value)); +} + +#if (defined(MEMORY_SANITIZER) || defined(THREAD_SANITIZER) || \ + (defined(ARCH_CPU_X86_64) && !defined(__clang__))) && \ + !defined(OS_NACL_NONSFI) +// If MEMORY_SANITIZER or THREAD_SANITIZER is enabled, it is necessary to call +// sigaction() here, rather than the direct syscall (sys_sigaction() defined +// by ourselves). +// It is because, if MEMORY_SANITIZER or THREAD_SANITIZER is enabled, sigaction +// is wrapped, and |act->sa_handler| is injected in order to unpoisonize the +// memory passed via callback's arguments for MEMORY_SANITIZER, or handle +// signals to check thread consistency for THREAD_SANITIZER. Please see +// msan_interceptors.cc and tsan_interceptors.cc for more details. +// So, specifically, if MEMORY_SANITIZER is enabled while the direct syscall is +// used, as MEMORY_SANITIZER does not know about it, sigaction() invocation in +// other places would be broken (in more precise, returned |oldact| would have +// a broken |sa_handler| callback). +// Practically, it would break NaCl's signal handler installation. +// cf) native_client/src/trusted/service_runtime/linux/nacl_signal.c. +// As for THREAD_SANITIZER, the intercepted signal handlers are processed more +// in other libc functions' interceptors (such as for raise()), so that it +// would not work properly. +// +// Also on x86_64 architecture, we need naked function for rt_sigreturn. +// However, there is no simple way to define it with GCC. Note that the body +// of function is actually very small (only two instructions), but we need to +// define much debug information in addition, otherwise backtrace() used by +// base::StackTrace would not work so that some tests would fail. +// +// When this is built with PNaCl toolchain, we should always use sys_sigaction +// below, because sigaction() provided by the toolchain is incompatible with +// Linux's ABI. So, otherwise, it would just fail. Note that it is not +// necessary to think about sigaction() invocation in other places even with +// MEMORY_SANITIZER or THREAD_SANITIZER, because it would just fail there. +int sys_sigaction(int signum, + const struct sigaction* act, + struct sigaction* oldact) { + return sigaction(signum, act, oldact); +} +#else +// struct sigaction is different ABI from the Linux's. +struct KernelSigAction { + void (*kernel_handler)(int); + uint32_t sa_flags; + void (*sa_restorer)(void); + uint64_t sa_mask; +}; + +// On X86_64 arch, it is necessary to set sa_restorer always. +#if defined(ARCH_CPU_X86_64) +#if !defined(SA_RESTORER) +#define SA_RESTORER 0x04000000 +#endif + +// rt_sigreturn is a special system call that interacts with the user land +// stack. Thus, here prologue must not be created, which implies syscall() +// does not work properly, too. Note that rt_sigreturn will never return. +static __attribute__((naked)) void sys_rt_sigreturn() { + // Just invoke rt_sigreturn system call. + asm volatile ("syscall\n" + :: "a"(__NR_rt_sigreturn)); +} +#endif + +int sys_sigaction(int signum, + const struct sigaction* act, + struct sigaction* oldact) { + KernelSigAction kernel_act = {}; + if (act) { + kernel_act.kernel_handler = act->sa_handler; + std::memcpy(&kernel_act.sa_mask, &act->sa_mask, + std::min(sizeof(kernel_act.sa_mask), sizeof(act->sa_mask))); + kernel_act.sa_flags = act->sa_flags; + +#if defined(ARCH_CPU_X86_64) + if (!(kernel_act.sa_flags & SA_RESTORER)) { + kernel_act.sa_flags |= SA_RESTORER; + kernel_act.sa_restorer = sys_rt_sigreturn; + } +#endif + } + + KernelSigAction kernel_oldact = {}; + int result = syscall(__NR_rt_sigaction, signum, act ? &kernel_act : nullptr, + oldact ? &kernel_oldact : nullptr, sizeof(uint64_t)); + if (result == 0 && oldact) { + oldact->sa_handler = kernel_oldact.kernel_handler; + sigemptyset(&oldact->sa_mask); + std::memcpy(&oldact->sa_mask, &kernel_oldact.sa_mask, + std::min(sizeof(kernel_act.sa_mask), sizeof(act->sa_mask))); + oldact->sa_flags = kernel_oldact.sa_flags; + } + return result; +} + +#endif // defined(MEMORY_SANITIZER) + +} // namespace sandbox diff --git a/sandbox/linux/services/syscall_wrappers.h b/sandbox/linux/services/syscall_wrappers.h new file mode 100644 index 0000000000..581425a367 --- /dev/null +++ b/sandbox/linux/services/syscall_wrappers.h @@ -0,0 +1,83 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SERVICES_SYSCALL_WRAPPERS_H_ +#define SANDBOX_LINUX_SERVICES_SYSCALL_WRAPPERS_H_ + +#include <signal.h> +#include <stdint.h> +#include <sys/types.h> + +#include "sandbox/sandbox_export.h" + +struct sock_fprog; +struct rlimit64; +struct cap_hdr; +struct cap_data; + +namespace sandbox { + +// Provide direct system call wrappers for a few common system calls. +// These are guaranteed to perform a system call and do not rely on things such +// as caching the current pid (c.f. getpid()) unless otherwise specified. + +SANDBOX_EXPORT pid_t sys_getpid(void); + +SANDBOX_EXPORT pid_t sys_gettid(void); + +SANDBOX_EXPORT long sys_clone(unsigned long flags); + +// |regs| is not supported and must be passed as nullptr. |child_stack| must be +// nullptr, since otherwise this function cannot safely return. As a +// consequence, this function does not support CLONE_VM. +SANDBOX_EXPORT long sys_clone(unsigned long flags, + decltype(nullptr) child_stack, + pid_t* ptid, + pid_t* ctid, + decltype(nullptr) regs); + +SANDBOX_EXPORT void sys_exit_group(int status); + +// The official system call takes |args| as void* (in order to be extensible), +// but add more typing for the cases that are currently used. +SANDBOX_EXPORT int sys_seccomp(unsigned int operation, + unsigned int flags, + const struct sock_fprog* args); + +// Some libcs do not expose a prlimit64 wrapper. +SANDBOX_EXPORT int sys_prlimit64(pid_t pid, + int resource, + const struct rlimit64* new_limit, + struct rlimit64* old_limit); + +// Some libcs do not expose capget/capset wrappers. We want to use these +// directly in order to avoid pulling in libcap2. +SANDBOX_EXPORT int sys_capget(struct cap_hdr* hdrp, struct cap_data* datap); +SANDBOX_EXPORT int sys_capset(struct cap_hdr* hdrp, + const struct cap_data* datap); + +// Some libcs do not expose getresuid/getresgid wrappers. +SANDBOX_EXPORT int sys_getresuid(uid_t* ruid, uid_t* euid, uid_t* suid); +SANDBOX_EXPORT int sys_getresgid(gid_t* rgid, gid_t* egid, gid_t* sgid); + +// Some libcs do not expose a chroot wrapper. +SANDBOX_EXPORT int sys_chroot(const char* path); + +// Some libcs do not expose a unshare wrapper. +SANDBOX_EXPORT int sys_unshare(int flags); + +// Some libcs do not expose a sigprocmask. Note that oldset must be a nullptr, +// because of some ABI gap between toolchain's and Linux's. +SANDBOX_EXPORT int sys_sigprocmask(int how, + const sigset_t* set, + decltype(nullptr) oldset); + +// Some libcs do not expose a sigaction(). +SANDBOX_EXPORT int sys_sigaction(int signum, + const struct sigaction* act, + struct sigaction* oldact); + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SERVICES_SYSCALL_WRAPPERS_H_ diff --git a/sandbox/linux/services/syscall_wrappers_unittest.cc b/sandbox/linux/services/syscall_wrappers_unittest.cc new file mode 100644 index 0000000000..249d9ae1da --- /dev/null +++ b/sandbox/linux/services/syscall_wrappers_unittest.cc @@ -0,0 +1,99 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/syscall_wrappers.h" + +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <cstring> + +#include "base/logging.h" +#include "base/posix/eintr_wrapper.h" +#include "build/build_config.h" +#include "sandbox/linux/system_headers/linux_signal.h" +#include "sandbox/linux/tests/test_utils.h" +#include "sandbox/linux/tests/unit_tests.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/valgrind/valgrind.h" + +namespace sandbox { + +namespace { + +TEST(SyscallWrappers, BasicSyscalls) { + EXPECT_EQ(getpid(), sys_getpid()); +} + +TEST(SyscallWrappers, CloneBasic) { + pid_t child = sys_clone(SIGCHLD); + TestUtils::HandlePostForkReturn(child); + EXPECT_LT(0, child); +} + +TEST(SyscallWrappers, CloneParentSettid) { + pid_t ptid = 0; + pid_t child = sys_clone(CLONE_PARENT_SETTID | SIGCHLD, nullptr, &ptid, + nullptr, nullptr); + TestUtils::HandlePostForkReturn(child); + EXPECT_LT(0, child); + EXPECT_EQ(child, ptid); +} + +TEST(SyscallWrappers, CloneChildSettid) { + pid_t ctid = 0; + pid_t pid = + sys_clone(CLONE_CHILD_SETTID | SIGCHLD, nullptr, nullptr, &ctid, nullptr); + + const int kSuccessExit = 0; + if (0 == pid) { + // In child. + if (sys_getpid() == ctid) + _exit(kSuccessExit); + _exit(1); + } + + ASSERT_NE(-1, pid); + int status = 0; + ASSERT_EQ(pid, HANDLE_EINTR(waitpid(pid, &status, 0))); + ASSERT_TRUE(WIFEXITED(status)); + EXPECT_EQ(kSuccessExit, WEXITSTATUS(status)); +} + +TEST(SyscallWrappers, GetRESUid) { + uid_t ruid, euid, suid; + uid_t sys_ruid, sys_euid, sys_suid; + ASSERT_EQ(0, getresuid(&ruid, &euid, &suid)); + ASSERT_EQ(0, sys_getresuid(&sys_ruid, &sys_euid, &sys_suid)); + EXPECT_EQ(ruid, sys_ruid); + EXPECT_EQ(euid, sys_euid); + EXPECT_EQ(suid, sys_suid); +} + +TEST(SyscallWrappers, GetRESGid) { + gid_t rgid, egid, sgid; + gid_t sys_rgid, sys_egid, sys_sgid; + ASSERT_EQ(0, getresgid(&rgid, &egid, &sgid)); + ASSERT_EQ(0, sys_getresgid(&sys_rgid, &sys_egid, &sys_sgid)); + EXPECT_EQ(rgid, sys_rgid); + EXPECT_EQ(egid, sys_egid); + EXPECT_EQ(sgid, sys_sgid); +} + +TEST(SyscallWrappers, LinuxSigSet) { + sigset_t sigset; + ASSERT_EQ(0, sigemptyset(&sigset)); + ASSERT_EQ(0, sigaddset(&sigset, LINUX_SIGSEGV)); + ASSERT_EQ(0, sigaddset(&sigset, LINUX_SIGBUS)); + uint64_t linux_sigset = 0; + std::memcpy(&linux_sigset, &sigset, + std::min(sizeof(sigset), sizeof(linux_sigset))); + EXPECT_EQ((1ULL << (LINUX_SIGSEGV - 1)) | (1ULL << (LINUX_SIGBUS - 1)), + linux_sigset); +} + +} // namespace + +} // namespace sandbox diff --git a/sandbox/linux/services/thread_helpers.cc b/sandbox/linux/services/thread_helpers.cc new file mode 100644 index 0000000000..80766a9bc5 --- /dev/null +++ b/sandbox/linux/services/thread_helpers.cc @@ -0,0 +1,157 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/thread_helpers.h" + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <string> + +#include "base/bind.h" +#include "base/callback.h" +#include "base/files/scoped_file.h" +#include "base/logging.h" +#include "base/posix/eintr_wrapper.h" +#include "base/strings/string_number_conversions.h" +#include "base/threading/platform_thread.h" +#include "base/threading/thread.h" +#include "sandbox/linux/services/proc_util.h" + +namespace sandbox { + +namespace { + +const char kAssertSingleThreadedError[] = + "Current process is not mono-threaded!"; + +bool IsSingleThreadedImpl(int proc_fd) { + CHECK_LE(0, proc_fd); + struct stat task_stat; + int fstat_ret = fstatat(proc_fd, "self/task/", &task_stat, 0); + PCHECK(0 == fstat_ret); + + // At least "..", "." and the current thread should be present. + CHECK_LE(3UL, task_stat.st_nlink); + // Counting threads via /proc/self/task could be racy. For the purpose of + // determining if the current proces is monothreaded it works: if at any + // time it becomes monothreaded, it'll stay so. + return task_stat.st_nlink == 3; +} + +bool IsThreadPresentInProcFS(int proc_fd, + const std::string& thread_id_dir_str) { + struct stat task_stat; + const int fstat_ret = + fstatat(proc_fd, thread_id_dir_str.c_str(), &task_stat, 0); + if (fstat_ret < 0) { + PCHECK(ENOENT == errno); + return false; + } + return true; +} + +// Run |cb| in a loop until it returns false. Every time |cb| runs, sleep +// for an exponentially increasing amount of time. |cb| is expected to return +// false very quickly and this will crash if it doesn't happen within ~64ms on +// Debug builds (2s on Release builds). +// This is guaranteed to not sleep more than twice as much as the bare minimum +// amount of time. +void RunWhileTrue(const base::Callback<bool(void)>& cb) { +#if defined(NDEBUG) + // In Release mode, crash after 30 iterations, which means having spent + // roughly 2s in + // nanosleep(2) cumulatively. + const unsigned int kMaxIterations = 30U; +#else + // In practice, this never goes through more than a couple iterations. In + // debug mode, crash after 64ms (+ eventually 25 times the granularity of + // the clock) in nanosleep(2). This ensures that this is not becoming too + // slow. + const unsigned int kMaxIterations = 25U; +#endif + + // Run |cb| with an exponential back-off, sleeping 2^iterations nanoseconds + // in nanosleep(2). + // Note: the clock may not allow for nanosecond granularity, in this case the + // first iterations would sleep a tiny bit more instead, which would not + // change the calculations significantly. + for (unsigned int i = 0; i < kMaxIterations; ++i) { + if (!cb.Run()) { + return; + } + + // Increase the waiting time exponentially. + struct timespec ts = {0, 1L << i /* nanoseconds */}; + PCHECK(0 == HANDLE_EINTR(nanosleep(&ts, &ts))); + } + + LOG(FATAL) << kAssertSingleThreadedError << " (iterations: " << kMaxIterations + << ")"; + + NOTREACHED(); +} + +bool IsMultiThreaded(int proc_fd) { + return !ThreadHelpers::IsSingleThreaded(proc_fd); +} + +} // namespace + +// static +bool ThreadHelpers::IsSingleThreaded(int proc_fd) { + DCHECK_LE(0, proc_fd); + return IsSingleThreadedImpl(proc_fd); +} + +// static +bool ThreadHelpers::IsSingleThreaded() { + base::ScopedFD task_fd(ProcUtil::OpenProc()); + return IsSingleThreaded(task_fd.get()); +} + +// static +void ThreadHelpers::AssertSingleThreaded(int proc_fd) { + DCHECK_LE(0, proc_fd); + const base::Callback<bool(void)> cb = base::Bind(&IsMultiThreaded, proc_fd); + RunWhileTrue(cb); +} + +void ThreadHelpers::AssertSingleThreaded() { + base::ScopedFD task_fd(ProcUtil::OpenProc()); + AssertSingleThreaded(task_fd.get()); +} + +// static +bool ThreadHelpers::StopThreadAndWatchProcFS(int proc_fd, + base::Thread* thread) { + DCHECK_LE(0, proc_fd); + DCHECK(thread); + const base::PlatformThreadId thread_id = thread->thread_id(); + const std::string thread_id_dir_str = + "self/task/" + base::IntToString(thread_id) + "/"; + + // The kernel is at liberty to wake the thread id futex before updating + // /proc. Following Stop(), the thread is joined, but entries in /proc may + // not have been updated. + thread->Stop(); + + const base::Callback<bool(void)> cb = + base::Bind(&IsThreadPresentInProcFS, proc_fd, thread_id_dir_str); + + RunWhileTrue(cb); + + return true; +} + +// static +const char* ThreadHelpers::GetAssertSingleThreadedErrorMessageForTests() { + return kAssertSingleThreadedError; +} + +} // namespace sandbox diff --git a/sandbox/linux/services/thread_helpers.h b/sandbox/linux/services/thread_helpers.h new file mode 100644 index 0000000000..f4abdffd03 --- /dev/null +++ b/sandbox/linux/services/thread_helpers.h @@ -0,0 +1,43 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SERVICES_THREAD_HELPERS_H_ +#define SANDBOX_LINUX_SERVICES_THREAD_HELPERS_H_ + +#include "base/macros.h" +#include "sandbox/sandbox_export.h" + +namespace base { class Thread; } + +namespace sandbox { + +class SANDBOX_EXPORT ThreadHelpers { + public: + // Check whether the current process is single threaded. |proc_fd| + // must be a file descriptor to /proc/ and remains owned by the + // caller. + static bool IsSingleThreaded(int proc_fd); + static bool IsSingleThreaded(); + + // Crash if the current process is not single threaded. This will wait + // on /proc to be updated. In the case where this doesn't crash, this will + // return promptly. In the case where this does crash, this will first wait + // for a few ms in Debug mode, a few seconds in Release mode. + static void AssertSingleThreaded(int proc_fd); + static void AssertSingleThreaded(); + + // Stop |thread| and ensure that it does not have an entry in + // /proc/self/task/ from the point of view of the current thread. This is + // the way to stop threads before calling IsSingleThreaded(). + static bool StopThreadAndWatchProcFS(int proc_fd, base::Thread* thread); + + static const char* GetAssertSingleThreadedErrorMessageForTests(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(ThreadHelpers); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SERVICES_THREAD_HELPERS_H_ diff --git a/sandbox/linux/services/thread_helpers_unittests.cc b/sandbox/linux/services/thread_helpers_unittests.cc new file mode 100644 index 0000000000..7357a0cfa7 --- /dev/null +++ b/sandbox/linux/services/thread_helpers_unittests.cc @@ -0,0 +1,147 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/thread_helpers.h" + +#include <errno.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "base/logging.h" +#include "base/memory/scoped_ptr.h" +#include "base/posix/eintr_wrapper.h" +#include "base/process/process_metrics.h" +#include "base/threading/platform_thread.h" +#include "base/threading/thread.h" +#include "build/build_config.h" +#include "sandbox/linux/tests/unit_tests.h" +#include "testing/gtest/include/gtest/gtest.h" + +using base::PlatformThread; + +namespace sandbox { + +namespace { + +// These tests fail under ThreadSanitizer, see http://crbug.com/342305 +#if !defined(THREAD_SANITIZER) + +int GetRaceTestIterations() { + if (IsRunningOnValgrind()) { + return 2; + } else { + return 1000; + } +} + +class ScopedProc { + public: + ScopedProc() : fd_(-1) { + fd_ = open("/proc/", O_RDONLY | O_DIRECTORY); + CHECK_LE(0, fd_); + } + + ~ScopedProc() { PCHECK(0 == IGNORE_EINTR(close(fd_))); } + + int fd() { return fd_; } + + private: + int fd_; + DISALLOW_COPY_AND_ASSIGN(ScopedProc); +}; + +TEST(ThreadHelpers, IsSingleThreadedBasic) { + ScopedProc proc_fd; + ASSERT_TRUE(ThreadHelpers::IsSingleThreaded(proc_fd.fd())); + ASSERT_TRUE(ThreadHelpers::IsSingleThreaded()); + + base::Thread thread("sandbox_tests"); + ASSERT_TRUE(thread.Start()); + ASSERT_FALSE(ThreadHelpers::IsSingleThreaded(proc_fd.fd())); + ASSERT_FALSE(ThreadHelpers::IsSingleThreaded()); + // Explicitly stop the thread here to not pollute the next test. + ASSERT_TRUE(ThreadHelpers::StopThreadAndWatchProcFS(proc_fd.fd(), &thread)); +} + +SANDBOX_TEST(ThreadHelpers, AssertSingleThreaded) { + ScopedProc proc_fd; + SANDBOX_ASSERT(ThreadHelpers::IsSingleThreaded(proc_fd.fd())); + SANDBOX_ASSERT(ThreadHelpers::IsSingleThreaded()); + + ThreadHelpers::AssertSingleThreaded(proc_fd.fd()); + ThreadHelpers::AssertSingleThreaded(); +} + +TEST(ThreadHelpers, IsSingleThreadedIterated) { + ScopedProc proc_fd; + ASSERT_TRUE(ThreadHelpers::IsSingleThreaded(proc_fd.fd())); + + // Iterate to check for race conditions. + for (int i = 0; i < GetRaceTestIterations(); ++i) { + base::Thread thread("sandbox_tests"); + ASSERT_TRUE(thread.Start()); + ASSERT_FALSE(ThreadHelpers::IsSingleThreaded(proc_fd.fd())); + // Explicitly stop the thread here to not pollute the next test. + ASSERT_TRUE(ThreadHelpers::StopThreadAndWatchProcFS(proc_fd.fd(), &thread)); + } +} + +TEST(ThreadHelpers, IsSingleThreadedStartAndStop) { + ScopedProc proc_fd; + ASSERT_TRUE(ThreadHelpers::IsSingleThreaded(proc_fd.fd())); + + base::Thread thread("sandbox_tests"); + // This is testing for a race condition, so iterate. + // Manually, this has been tested with more that 1M iterations. + for (int i = 0; i < GetRaceTestIterations(); ++i) { + ASSERT_TRUE(thread.Start()); + ASSERT_FALSE(ThreadHelpers::IsSingleThreaded(proc_fd.fd())); + + ASSERT_TRUE(ThreadHelpers::StopThreadAndWatchProcFS(proc_fd.fd(), &thread)); + ASSERT_TRUE(ThreadHelpers::IsSingleThreaded(proc_fd.fd())); + ASSERT_EQ(1, base::GetNumberOfThreads(base::GetCurrentProcessHandle())); + } +} + +SANDBOX_TEST(ThreadHelpers, AssertSingleThreadedAfterThreadStopped) { + SANDBOX_ASSERT(ThreadHelpers::IsSingleThreaded()); + + base::Thread thread1("sandbox_tests"); + base::Thread thread2("sandbox_tests"); + + for (int i = 0; i < GetRaceTestIterations(); ++i) { + SANDBOX_ASSERT(thread1.Start()); + SANDBOX_ASSERT(thread2.Start()); + SANDBOX_ASSERT(!ThreadHelpers::IsSingleThreaded()); + + thread1.Stop(); + thread2.Stop(); + // This will wait on /proc/ to reflect the state of threads in the + // process. + ThreadHelpers::AssertSingleThreaded(); + SANDBOX_ASSERT(ThreadHelpers::IsSingleThreaded()); + } +} + +// Only run this test in Debug mode, where AssertSingleThreaded() will return +// in less than 64ms. +#if !defined(NDEBUG) +SANDBOX_DEATH_TEST( + ThreadHelpers, + AssertSingleThreadedDies, + DEATH_MESSAGE( + ThreadHelpers::GetAssertSingleThreadedErrorMessageForTests())) { + base::Thread thread1("sandbox_tests"); + SANDBOX_ASSERT(thread1.Start()); + ThreadHelpers::AssertSingleThreaded(); +} +#endif // !defined(NDEBUG) + +#endif // !defined(THREAD_SANITIZER) + +} // namespace + +} // namespace sandbox diff --git a/sandbox/linux/services/yama.cc b/sandbox/linux/services/yama.cc new file mode 100644 index 0000000000..151f4bd340 --- /dev/null +++ b/sandbox/linux/services/yama.cc @@ -0,0 +1,115 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/services/yama.h" + +#include <fcntl.h> +#include <sys/prctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "base/files/file_util.h" +#include "base/files/scoped_file.h" +#include "base/logging.h" +#include "base/posix/eintr_wrapper.h" + +#if !defined(PR_SET_PTRACER_ANY) +#define PR_SET_PTRACER_ANY ((unsigned long)-1) +#endif + +#if !defined(PR_SET_PTRACER) +#define PR_SET_PTRACER 0x59616d61 +#endif + +namespace sandbox { + +namespace { + +// Enable or disable the Yama ptracers restrictions. +// Return false if Yama is not present on this kernel. +bool SetYamaPtracersRestriction(bool enable_restrictions) { + unsigned long set_ptracer_arg; + if (enable_restrictions) { + set_ptracer_arg = 0; + } else { + set_ptracer_arg = PR_SET_PTRACER_ANY; + } + + const int ret = prctl(PR_SET_PTRACER, set_ptracer_arg); + const int prctl_errno = errno; + + if (0 == ret) { + return true; + } else { + // ENOSYS or EINVAL means Yama is not in the current kernel. + CHECK(ENOSYS == prctl_errno || EINVAL == prctl_errno); + return false; + } +} + +bool CanAccessProcFS() { + static const char kProcfsKernelSysPath[] = "/proc/sys/kernel/"; + int ret = access(kProcfsKernelSysPath, F_OK); + if (ret) { + return false; + } + return true; +} + +} // namespace + +// static +bool Yama::RestrictPtracersToAncestors() { + return SetYamaPtracersRestriction(true /* enable_restrictions */); +} + +// static +bool Yama::DisableYamaRestrictions() { + return SetYamaPtracersRestriction(false /* enable_restrictions */); +} + +// static +int Yama::GetStatus() { + if (!CanAccessProcFS()) { + return 0; + } + + static const char kPtraceScopePath[] = "/proc/sys/kernel/yama/ptrace_scope"; + + base::ScopedFD yama_scope(HANDLE_EINTR(open(kPtraceScopePath, O_RDONLY))); + + if (!yama_scope.is_valid()) { + const int open_errno = errno; + DCHECK(ENOENT == open_errno); + // The status is known, yama is not present. + return STATUS_KNOWN; + } + + char yama_scope_value = 0; + ssize_t num_read = HANDLE_EINTR(read(yama_scope.get(), &yama_scope_value, 1)); + PCHECK(1 == num_read); + + switch (yama_scope_value) { + case '0': + return STATUS_KNOWN | STATUS_PRESENT; + case '1': + return STATUS_KNOWN | STATUS_PRESENT | STATUS_ENFORCING; + case '2': + case '3': + return STATUS_KNOWN | STATUS_PRESENT | STATUS_ENFORCING | + STATUS_STRICT_ENFORCING; + default: + NOTREACHED(); + return 0; + } +} + +// static +bool Yama::IsPresent() { return GetStatus() & STATUS_PRESENT; } + +// static +bool Yama::IsEnforcing() { return GetStatus() & STATUS_ENFORCING; } + +} // namespace sandbox diff --git a/sandbox/linux/services/yama.h b/sandbox/linux/services/yama.h new file mode 100644 index 0000000000..e6c5c45b2a --- /dev/null +++ b/sandbox/linux/services/yama.h @@ -0,0 +1,57 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SERVICES_YAMA_H_ +#define SANDBOX_LINUX_SERVICES_YAMA_H_ + +#include "base/macros.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +// Yama is a LSM kernel module which can restrict ptrace(). +// This class provides ways to detect if Yama is present and enabled +// and to restrict which processes can ptrace the current process. +class SANDBOX_EXPORT Yama { + public: + // This enum should be used to set or check a bitmask. + // A value of 0 would indicate that the status is not known. + enum GlobalStatus { + STATUS_KNOWN = 1 << 0, + STATUS_PRESENT = 1 << 1, + STATUS_ENFORCING = 1 << 2, + // STATUS_STRICT_ENFORCING corresponds to either mode 2 or mode 3 of Yama. + // Ptrace could be entirely denied, or restricted to CAP_SYS_PTRACE + // and PTRACE_TRACEME. + STATUS_STRICT_ENFORCING = 1 << 3 + }; + + // Restrict who can ptrace() the current process to its ancestors. + // If this succeeds, then Yama is available on this kernel. + // However, Yama may not be enforcing at this time. + static bool RestrictPtracersToAncestors(); + + // Disable Yama restrictions for the current process. + // This will fail if Yama is not available on this kernel. + // This is meant for testing only. If you need this, implement + // a per-pid authorization instead. + static bool DisableYamaRestrictions(); + + // Checks if Yama is currently in enforcing mode for the machine (not the + // current process). This requires access to the filesystem and will use + // /proc/sys/kernel/yama/ptrace_scope. + static int GetStatus(); + + // Helper for checking for STATUS_PRESENT in GetStatus(). + static bool IsPresent(); + // Helper for checkking for STATUS_ENFORCING in GetStatus(). + static bool IsEnforcing(); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Yama); +}; + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SERVICES_YAMA_H_ diff --git a/sandbox/linux/services/yama_unittests.cc b/sandbox/linux/services/yama_unittests.cc new file mode 100644 index 0000000000..204cfd6a44 --- /dev/null +++ b/sandbox/linux/services/yama_unittests.cc @@ -0,0 +1,172 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <errno.h> +#include <fcntl.h> +#include <sys/ptrace.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "base/bind.h" +#include "base/compiler_specific.h" +#include "base/posix/eintr_wrapper.h" +#include "base/strings/string_util.h" +#include "base/sys_info.h" +#include "sandbox/linux/services/scoped_process.h" +#include "sandbox/linux/services/yama.h" +#include "sandbox/linux/tests/unit_tests.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace sandbox { + +namespace { + +bool HasLinux32Bug() { +#if defined(__i386__) + // On 3.2 kernels, yama doesn't work for 32-bit binaries on 64-bit kernels. + // This is fixed in 3.4. + bool is_kernel_64bit = + base::SysInfo::OperatingSystemArchitecture() == "x86_64"; + bool is_linux = base::SysInfo::OperatingSystemName() == "Linux"; + bool is_3_dot_2 = base::StartsWithASCII( + base::SysInfo::OperatingSystemVersion(), "3.2", /*case_sensitive=*/false); + if (is_kernel_64bit && is_linux && is_3_dot_2) + return true; +#endif // defined(__i386__) + return false; +} + +bool CanPtrace(pid_t pid) { + int ret; + ret = ptrace(PTRACE_ATTACH, pid, NULL, NULL); + if (ret == -1) { + CHECK_EQ(EPERM, errno); + return false; + } + // Wait for the process to be stopped so that it can be detached. + siginfo_t process_info; + int wait_ret = HANDLE_EINTR(waitid(P_PID, pid, &process_info, WSTOPPED)); + PCHECK(0 == wait_ret); + PCHECK(0 == ptrace(PTRACE_DETACH, pid, NULL, NULL)); + return true; +} + +// _exit(0) if pid can be ptraced by the current process. +// _exit(1) otherwise. +void ExitZeroIfCanPtrace(pid_t pid) { + if (CanPtrace(pid)) { + _exit(0); + } else { + _exit(1); + } +} + +bool CanSubProcessPtrace(pid_t pid) { + ScopedProcess process(base::Bind(&ExitZeroIfCanPtrace, pid)); + bool signaled; + int exit_code = process.WaitForExit(&signaled); + CHECK(!signaled); + return 0 == exit_code; +} + +// The tests below assume that the system-level configuration will not change +// while they run. + +TEST(Yama, GetStatus) { + int status1 = Yama::GetStatus(); + + // Check that the value is a possible bitmask. + ASSERT_LE(0, status1); + ASSERT_GE(Yama::STATUS_KNOWN | Yama::STATUS_PRESENT | Yama::STATUS_ENFORCING | + Yama::STATUS_STRICT_ENFORCING, + status1); + + // The status should not just be a random value. + int status2 = Yama::GetStatus(); + EXPECT_EQ(status1, status2); + + // This test is not running sandboxed, there is no reason to not know the + // status. + EXPECT_NE(0, Yama::STATUS_KNOWN & status1); + + if (status1 & Yama::STATUS_STRICT_ENFORCING) { + // If Yama is strictly enforcing, it is also enforcing. + EXPECT_TRUE(status1 & Yama::STATUS_ENFORCING); + } + + if (status1 & Yama::STATUS_ENFORCING) { + // If Yama is enforcing, Yama is present. + EXPECT_NE(0, status1 & Yama::STATUS_PRESENT); + } + + // Verify that the helper functions work as intended. + EXPECT_EQ(static_cast<bool>(status1 & Yama::STATUS_ENFORCING), + Yama::IsEnforcing()); + EXPECT_EQ(static_cast<bool>(status1 & Yama::STATUS_PRESENT), + Yama::IsPresent()); + + fprintf(stdout, + "Yama present: %s - enforcing: %s\n", + Yama::IsPresent() ? "Y" : "N", + Yama::IsEnforcing() ? "Y" : "N"); +} + +SANDBOX_TEST(Yama, RestrictPtraceSucceedsWhenYamaPresent) { + // This call will succeed iff Yama is present. + bool restricted = Yama::RestrictPtracersToAncestors(); + CHECK_EQ(restricted, Yama::IsPresent()); +} + +// Attempts to enable or disable Yama restrictions. +void SetYamaRestrictions(bool enable_restriction) { + if (enable_restriction) { + Yama::RestrictPtracersToAncestors(); + } else { + Yama::DisableYamaRestrictions(); + } +} + +TEST(Yama, RestrictPtraceWorks) { + if (HasLinux32Bug()) + return; + + ScopedProcess process1(base::Bind(&SetYamaRestrictions, true)); + ASSERT_TRUE(process1.WaitForClosureToRun()); + + if (Yama::IsEnforcing()) { + // A sibling process cannot ptrace process1. + ASSERT_FALSE(CanSubProcessPtrace(process1.GetPid())); + } + + if (!(Yama::GetStatus() & Yama::STATUS_STRICT_ENFORCING)) { + // However, parent can ptrace process1. + ASSERT_TRUE(CanPtrace(process1.GetPid())); + + // A sibling can ptrace process2 which disables any Yama protection. + ScopedProcess process2(base::Bind(&SetYamaRestrictions, false)); + ASSERT_TRUE(process2.WaitForClosureToRun()); + ASSERT_TRUE(CanSubProcessPtrace(process2.GetPid())); + } +} + +void DoNothing() {} + +SANDBOX_TEST(Yama, RestrictPtraceIsDefault) { + if (!Yama::IsPresent() || HasLinux32Bug()) + return; + + CHECK(Yama::DisableYamaRestrictions()); + ScopedProcess process1(base::Bind(&DoNothing)); + + if (Yama::IsEnforcing()) { + // Check that process1 is protected by Yama, even though it has + // been created from a process that disabled Yama. + CHECK(!CanSubProcessPtrace(process1.GetPid())); + } +} + +} // namespace + +} // namespace sandbox diff --git a/sandbox/linux/syscall_broker/broker_channel.cc b/sandbox/linux/syscall_broker/broker_channel.cc new file mode 100644 index 0000000000..fa0f7615fc --- /dev/null +++ b/sandbox/linux/syscall_broker/broker_channel.cc @@ -0,0 +1,35 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/syscall_broker/broker_channel.h" + +#include <sys/socket.h> +#include <sys/types.h> + +#include "base/logging.h" + +namespace sandbox { + +namespace syscall_broker { + +// static +void BrokerChannel::CreatePair(EndPoint* reader, EndPoint* writer) { + DCHECK(reader); + DCHECK(writer); + int socket_pair[2]; + // Use SOCK_SEQPACKET, to preserve message boundaries but we also want to be + // notified (recvmsg should return and not block) when the connection has + // been broken which could mean that the other end has been closed. + PCHECK(0 == socketpair(AF_UNIX, SOCK_SEQPACKET, 0, socket_pair)); + + reader->reset(socket_pair[0]); + PCHECK(0 == shutdown(reader->get(), SHUT_WR)); + + writer->reset(socket_pair[1]); + PCHECK(0 == shutdown(writer->get(), SHUT_RD)); +} + +} // namespace syscall_broker + +} // namespace sandbox diff --git a/sandbox/linux/syscall_broker/broker_channel.h b/sandbox/linux/syscall_broker/broker_channel.h new file mode 100644 index 0000000000..2abdba413a --- /dev/null +++ b/sandbox/linux/syscall_broker/broker_channel.h @@ -0,0 +1,31 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SYSCALL_BROKER_BROKER_CHANNEL_H_ +#define SANDBOX_LINUX_SYSCALL_BROKER_BROKER_CHANNEL_H_ + +#include "base/files/scoped_file.h" +#include "base/macros.h" + +namespace sandbox { + +namespace syscall_broker { + +// A small class to create a pipe-like communication channel. It is based on a +// SOCK_SEQPACKET unix socket, which is connection-based and guaranteed to +// preserve message boundaries. +class BrokerChannel { + public: + typedef base::ScopedFD EndPoint; + static void CreatePair(EndPoint* reader, EndPoint* writer); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(BrokerChannel); +}; + +} // namespace syscall_broker + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SYSCALL_BROKER_BROKER_CHANNEL_H_ diff --git a/sandbox/linux/syscall_broker/broker_client.cc b/sandbox/linux/syscall_broker/broker_client.cc new file mode 100644 index 0000000000..760cf59b3c --- /dev/null +++ b/sandbox/linux/syscall_broker/broker_client.cc @@ -0,0 +1,144 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/syscall_broker/broker_client.h" + +#include <errno.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "build/build_config.h" +#include "base/logging.h" +#include "base/pickle.h" +#include "base/posix/unix_domain_socket_linux.h" +#include "sandbox/linux/syscall_broker/broker_channel.h" +#include "sandbox/linux/syscall_broker/broker_common.h" +#include "sandbox/linux/syscall_broker/broker_policy.h" + +#if defined(OS_ANDROID) && !defined(MSG_CMSG_CLOEXEC) +#define MSG_CMSG_CLOEXEC 0x40000000 +#endif + +namespace sandbox { + +namespace syscall_broker { + +// Make a remote system call over IPC for syscalls that take a path and flags +// as arguments, currently open() and access(). +// Will return -errno like a real system call. +// This function needs to be async signal safe. +int BrokerClient::PathAndFlagsSyscall(IPCCommand syscall_type, + const char* pathname, + int flags) const { + int recvmsg_flags = 0; + RAW_CHECK(syscall_type == COMMAND_OPEN || syscall_type == COMMAND_ACCESS); + if (!pathname) + return -EFAULT; + + // For this "remote system call" to work, we need to handle any flag that + // cannot be sent over a Unix socket in a special way. + // See the comments around kCurrentProcessOpenFlagsMask. + if (syscall_type == COMMAND_OPEN && (flags & kCurrentProcessOpenFlagsMask)) { + // This implementation only knows about O_CLOEXEC, someone needs to look at + // this code if other flags are added. + RAW_CHECK(kCurrentProcessOpenFlagsMask == O_CLOEXEC); + recvmsg_flags |= MSG_CMSG_CLOEXEC; + flags &= ~O_CLOEXEC; + } + + // There is no point in forwarding a request that we know will be denied. + // Of course, the real security check needs to be on the other side of the + // IPC. + if (fast_check_in_client_) { + if (syscall_type == COMMAND_OPEN && + !broker_policy_.GetFileNameIfAllowedToOpen( + pathname, flags, NULL /* file_to_open */, + NULL /* unlink_after_open */)) { + return -broker_policy_.denied_errno(); + } + if (syscall_type == COMMAND_ACCESS && + !broker_policy_.GetFileNameIfAllowedToAccess(pathname, flags, NULL)) { + return -broker_policy_.denied_errno(); + } + } + + base::Pickle write_pickle; + write_pickle.WriteInt(syscall_type); + write_pickle.WriteString(pathname); + write_pickle.WriteInt(flags); + RAW_CHECK(write_pickle.size() <= kMaxMessageLength); + + int returned_fd = -1; + uint8_t reply_buf[kMaxMessageLength]; + + // Send a request (in write_pickle) as well that will include a new + // temporary socketpair (created internally by SendRecvMsg()). + // Then read the reply on this new socketpair in reply_buf and put an + // eventual attached file descriptor in |returned_fd|. + ssize_t msg_len = base::UnixDomainSocket::SendRecvMsgWithFlags( + ipc_channel_.get(), reply_buf, sizeof(reply_buf), recvmsg_flags, + &returned_fd, write_pickle); + if (msg_len <= 0) { + if (!quiet_failures_for_tests_) + RAW_LOG(ERROR, "Could not make request to broker process"); + return -ENOMEM; + } + + base::Pickle read_pickle(reinterpret_cast<char*>(reply_buf), msg_len); + base::PickleIterator iter(read_pickle); + int return_value = -1; + // Now deserialize the return value and eventually return the file + // descriptor. + if (iter.ReadInt(&return_value)) { + switch (syscall_type) { + case COMMAND_ACCESS: + // We should never have a fd to return. + RAW_CHECK(returned_fd == -1); + return return_value; + case COMMAND_OPEN: + if (return_value < 0) { + RAW_CHECK(returned_fd == -1); + return return_value; + } else { + // We have a real file descriptor to return. + RAW_CHECK(returned_fd >= 0); + return returned_fd; + } + default: + RAW_LOG(ERROR, "Unsupported command"); + return -ENOSYS; + } + } else { + RAW_LOG(ERROR, "Could not read pickle"); + NOTREACHED(); + return -ENOMEM; + } +} + +BrokerClient::BrokerClient(const BrokerPolicy& broker_policy, + BrokerChannel::EndPoint ipc_channel, + bool fast_check_in_client, + bool quiet_failures_for_tests) + : broker_policy_(broker_policy), + ipc_channel_(ipc_channel.Pass()), + fast_check_in_client_(fast_check_in_client), + quiet_failures_for_tests_(quiet_failures_for_tests) { +} + +BrokerClient::~BrokerClient() { +} + +int BrokerClient::Access(const char* pathname, int mode) const { + return PathAndFlagsSyscall(COMMAND_ACCESS, pathname, mode); +} + +int BrokerClient::Open(const char* pathname, int flags) const { + return PathAndFlagsSyscall(COMMAND_OPEN, pathname, flags); +} + +} // namespace syscall_broker + +} // namespace sandbox diff --git a/sandbox/linux/syscall_broker/broker_client.h b/sandbox/linux/syscall_broker/broker_client.h new file mode 100644 index 0000000000..2dfef8150c --- /dev/null +++ b/sandbox/linux/syscall_broker/broker_client.h @@ -0,0 +1,75 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SYSCALL_BROKER_BROKER_CLIENT_H_ +#define SANDBOX_LINUX_SYSCALL_BROKER_BROKER_CLIENT_H_ + +#include "base/macros.h" +#include "sandbox/linux/syscall_broker/broker_channel.h" +#include "sandbox/linux/syscall_broker/broker_common.h" + +namespace sandbox { + +namespace syscall_broker { + +class BrokerPolicy; + +// This class can be embedded in a sandboxed process and can be +// used to perform certain system calls in another, presumably +// non-sandboxed process (which embeds BrokerHost). +// A key feature of this class is the ability to use some of its methods in a +// thread-safe and async-signal safe way. The goal is to be able to use it to +// replace the open() or access() system calls happening anywhere in a process +// (as allowed for instance by seccomp-bpf's SIGSYS mechanism). +class BrokerClient { + public: + // |policy| needs to match the policy used by BrokerHost. This + // allows to predict some of the requests which will be denied + // and save an IPC round trip. + // |ipc_channel| needs to be a suitable SOCK_SEQPACKET unix socket. + // |fast_check_in_client| should be set to true and + // |quiet_failures_for_tests| to false unless you are writing tests. + BrokerClient(const BrokerPolicy& policy, + BrokerChannel::EndPoint ipc_channel, + bool fast_check_in_client, + bool quiet_failures_for_tests); + ~BrokerClient(); + + // Can be used in place of access(). + // X_OK will always return an error in practice since the broker process + // doesn't support execute permissions. + // It's similar to the access() system call and will return -errno on errors. + // This is async signal safe. + int Access(const char* pathname, int mode) const; + // Can be used in place of open(). + // The implementation only supports certain white listed flags and will + // return -EPERM on other flags. + // It's similar to the open() system call and will return -errno on errors. + // This is async signal safe. + int Open(const char* pathname, int flags) const; + + // Get the file descriptor used for IPC. This is used for tests. + int GetIPCDescriptor() const { return ipc_channel_.get(); } + + private: + const BrokerPolicy& broker_policy_; + const BrokerChannel::EndPoint ipc_channel_; + const bool fast_check_in_client_; // Whether to forward a request that we + // know will be denied to the broker. (Used + // for tests). + const bool quiet_failures_for_tests_; // Disable certain error message when + // testing for failures. + + int PathAndFlagsSyscall(IPCCommand syscall_type, + const char* pathname, + int flags) const; + + DISALLOW_COPY_AND_ASSIGN(BrokerClient); +}; + +} // namespace syscall_broker + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SYSCALL_BROKER_BROKER_CLIENT_H_ diff --git a/sandbox/linux/syscall_broker/broker_file_permission.cc b/sandbox/linux/syscall_broker/broker_file_permission.cc new file mode 100644 index 0000000000..beceda93f5 --- /dev/null +++ b/sandbox/linux/syscall_broker/broker_file_permission.cc @@ -0,0 +1,243 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/syscall_broker/broker_file_permission.h" + +#include <fcntl.h> +#include <string.h> + +#include <string> + +#include "base/logging.h" +#include "sandbox/linux/syscall_broker/broker_common.h" + +namespace sandbox { + +namespace syscall_broker { + +// Async signal safe +bool BrokerFilePermission::ValidatePath(const char* path) { + if (!path) + return false; + + const size_t len = strlen(path); + // No empty paths + if (len == 0) + return false; + // Paths must be absolute and not relative + if (path[0] != '/') + return false; + // No trailing / (but "/" is valid) + if (len > 1 && path[len - 1] == '/') + return false; + // No trailing /.. + if (len >= 3 && path[len - 3] == '/' && path[len - 2] == '.' && + path[len - 1] == '.') + return false; + // No /../ anywhere + for (size_t i = 0; i < len; i++) { + if (path[i] == '/' && (len - i) > 3) { + if (path[i + 1] == '.' && path[i + 2] == '.' && path[i + 3] == '/') { + return false; + } + } + } + return true; +} + +// Async signal safe +// Calls std::string::c_str(), strncmp and strlen. All these +// methods are async signal safe in common standard libs. +// TODO(leecam): remove dependency on std::string +bool BrokerFilePermission::MatchPath(const char* requested_filename) const { + const char* path = path_.c_str(); + if ((recursive_ && strncmp(requested_filename, path, strlen(path)) == 0)) { + // Note: This prefix match will allow any path under the whitelisted + // path, for any number of directory levels. E.g. if the whitelisted + // path is /good/ then the following will be permitted by the policy. + // /good/file1 + // /good/folder/file2 + // /good/folder/folder2/file3 + // If an attacker could make 'folder' a symlink to ../../ they would have + // access to the entire filesystem. + // Whitelisting with multiple depths is useful, e.g /proc/ but + // the system needs to ensure symlinks can not be created! + // That said if an attacker can convert any of the absolute paths + // to a symlink they can control any file on the system also. + return true; + } else if (strcmp(requested_filename, path) == 0) { + return true; + } + return false; +} + +// Async signal safe. +// External call to std::string::c_str() is +// called in MatchPath. +// TODO(leecam): remove dependency on std::string +bool BrokerFilePermission::CheckAccess(const char* requested_filename, + int mode, + const char** file_to_access) const { + // First, check if |mode| is existence, ability to read or ability + // to write. We do not support X_OK. + if (mode != F_OK && mode & ~(R_OK | W_OK)) { + return false; + } + + if (!ValidatePath(requested_filename)) + return false; + + if (!MatchPath(requested_filename)) { + return false; + } + bool allowed = false; + switch (mode) { + case F_OK: + if (allow_read_ || allow_write_) + allowed = true; + break; + case R_OK: + if (allow_read_) + allowed = true; + break; + case W_OK: + if (allow_write_) + allowed = true; + break; + case R_OK | W_OK: + if (allow_read_ && allow_write_) + allowed = true; + break; + default: + return false; + } + + if (allowed && file_to_access) { + if (!recursive_) + *file_to_access = path_.c_str(); + else + *file_to_access = requested_filename; + } + return allowed; +} + +// Async signal safe. +// External call to std::string::c_str() is +// called in MatchPath. +// TODO(leecam): remove dependency on std::string +bool BrokerFilePermission::CheckOpen(const char* requested_filename, + int flags, + const char** file_to_open, + bool* unlink_after_open) const { + if (!ValidatePath(requested_filename)) + return false; + + if (!MatchPath(requested_filename)) { + return false; + } + + // First, check the access mode is valid. + const int access_mode = flags & O_ACCMODE; + if (access_mode != O_RDONLY && access_mode != O_WRONLY && + access_mode != O_RDWR) { + return false; + } + + // Check if read is allowed + if (!allow_read_ && (access_mode == O_RDONLY || access_mode == O_RDWR)) { + return false; + } + + // Check if write is allowed + if (!allow_write_ && (access_mode == O_WRONLY || access_mode == O_RDWR)) { + return false; + } + + // Check if file creation is allowed. + if (!allow_create_ && (flags & O_CREAT)) { + return false; + } + + // If O_CREAT is present, ensure O_EXCL + if ((flags & O_CREAT) && !(flags & O_EXCL)) { + return false; + } + + // If this file is to be unlinked, ensure it's created. + if (unlink_ && !(flags & O_CREAT)) { + return false; + } + + // Some flags affect the behavior of the current process. We don't support + // them and don't allow them for now. + if (flags & kCurrentProcessOpenFlagsMask) { + return false; + } + + // Now check that all the flags are known to us. + const int creation_and_status_flags = flags & ~O_ACCMODE; + + const int known_flags = O_APPEND | O_ASYNC | O_CLOEXEC | O_CREAT | O_DIRECT | + O_DIRECTORY | O_EXCL | O_LARGEFILE | O_NOATIME | + O_NOCTTY | O_NOFOLLOW | O_NONBLOCK | O_NDELAY | + O_SYNC | O_TRUNC; + + const int unknown_flags = ~known_flags; + const bool has_unknown_flags = creation_and_status_flags & unknown_flags; + + if (has_unknown_flags) + return false; + + if (file_to_open) { + if (!recursive_) + *file_to_open = path_.c_str(); + else + *file_to_open = requested_filename; + } + if (unlink_after_open) + *unlink_after_open = unlink_; + + return true; +} + +const char* BrokerFilePermission::GetErrorMessageForTests() { + static char kInvalidBrokerFileString[] = "Invalid BrokerFilePermission"; + return kInvalidBrokerFileString; +} + +BrokerFilePermission::BrokerFilePermission(const std::string& path, + bool recursive, + bool unlink, + bool allow_read, + bool allow_write, + bool allow_create) + : path_(path), + recursive_(recursive), + unlink_(unlink), + allow_read_(allow_read), + allow_write_(allow_write), + allow_create_(allow_create) { + // Validate this permission and die if invalid! + + // Must have enough length for a '/' + CHECK(path_.length() > 0) << GetErrorMessageForTests(); + // Whitelisted paths must be absolute. + CHECK(path_[0] == '/') << GetErrorMessageForTests(); + + // Don't allow unlinking on creation without create permission + if (unlink_) { + CHECK(allow_create) << GetErrorMessageForTests(); + } + const char last_char = *(path_.rbegin()); + // Recursive paths must have a trailing slash + if (recursive_) { + CHECK(last_char == '/') << GetErrorMessageForTests(); + } else { + CHECK(last_char != '/') << GetErrorMessageForTests(); + } +} + +} // namespace syscall_broker + +} // namespace sandbox
\ No newline at end of file diff --git a/sandbox/linux/syscall_broker/broker_file_permission.h b/sandbox/linux/syscall_broker/broker_file_permission.h new file mode 100644 index 0000000000..03300d1d74 --- /dev/null +++ b/sandbox/linux/syscall_broker/broker_file_permission.h @@ -0,0 +1,119 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SYSCALL_BROKER_BROKER_FILE_PERMISSION_H_ +#define SANDBOX_LINUX_SYSCALL_BROKER_BROKER_FILE_PERMISSION_H_ + +#include <string> + +#include "base/macros.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +namespace syscall_broker { + +// BrokerFilePermission defines a path for whitelisting. +// Pick the correct static factory method to create a permission. +// CheckOpen and CheckAccess are async signal safe. +// Constuction and Destruction are not async signal safe. +// |path| is the path to be whitelisted. +class SANDBOX_EXPORT BrokerFilePermission { + public: + ~BrokerFilePermission() {} + BrokerFilePermission(const BrokerFilePermission&) = default; + BrokerFilePermission& operator=(const BrokerFilePermission&) = default; + + static BrokerFilePermission ReadOnly(const std::string& path) { + return BrokerFilePermission(path, false, false, true, false, false); + } + + static BrokerFilePermission ReadOnlyRecursive(const std::string& path) { + return BrokerFilePermission(path, true, false, true, false, false); + } + + static BrokerFilePermission WriteOnly(const std::string& path) { + return BrokerFilePermission(path, false, false, false, true, false); + } + + static BrokerFilePermission ReadWrite(const std::string& path) { + return BrokerFilePermission(path, false, false, true, true, false); + } + + static BrokerFilePermission ReadWriteCreate(const std::string& path) { + return BrokerFilePermission(path, false, false, true, true, true); + } + + static BrokerFilePermission ReadWriteCreateUnlink(const std::string& path) { + return BrokerFilePermission(path, false, true, true, true, true); + } + + static BrokerFilePermission ReadWriteCreateUnlinkRecursive( + const std::string& path) { + return BrokerFilePermission(path, true, true, true, true, true); + } + + // Returns true if |requested_filename| is allowed to be opened + // by this permission. + // If |file_to_open| is not NULL it is set to point to either + // the |requested_filename| in the case of a recursive match, + // or a pointer the matched path in the whitelist if an absolute + // match. + // If not NULL |unlink_after_open| is set to point to true if the + // caller should unlink the path after openning. + // Async signal safe if |file_to_open| is NULL. + bool CheckOpen(const char* requested_filename, + int flags, + const char** file_to_open, + bool* unlink_after_open) const; + // Returns true if |requested_filename| is allowed to be accessed + // by this permission as per access(2). + // If |file_to_open| is not NULL it is set to point to either + // the |requested_filename| in the case of a recursive match, + // or a pointer to the matched path in the whitelist if an absolute + // match. + // |mode| is per mode argument of access(2). + // Async signal safe if |file_to_access| is NULL + bool CheckAccess(const char* requested_filename, + int mode, + const char** file_to_access) const; + + private: + friend class BrokerFilePermissionTester; + BrokerFilePermission(const std::string& path, + bool recursive, + bool unlink, + bool allow_read, + bool allow_write, + bool allow_create); + + // ValidatePath checks |path| and returns true if these conditions are met + // * Greater than 0 length + // * Is an absolute path + // * No trailing slash + // * No /../ path traversal + static bool ValidatePath(const char* path); + + // MatchPath returns true if |requested_filename| is covered by this instance + bool MatchPath(const char* requested_filename) const; + + // Used in by BrokerFilePermissionTester for tests. + static const char* GetErrorMessageForTests(); + + // These are not const as std::vector requires copy-assignment and this class + // is stored in vectors. All methods are marked const so + // the compiler will still enforce no changes outside of the constructor. + std::string path_; + bool recursive_; // Allow everything under this path. |path| must be a dir. + bool unlink_; // unlink after opening. + bool allow_read_; + bool allow_write_; + bool allow_create_; +}; + +} // namespace syscall_broker + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SYSCALL_BROKER_BROKER_FILE_PERMISSION_H_
\ No newline at end of file diff --git a/sandbox/linux/syscall_broker/broker_file_permission_unittest.cc b/sandbox/linux/syscall_broker/broker_file_permission_unittest.cc new file mode 100644 index 0000000000..b58a901cde --- /dev/null +++ b/sandbox/linux/syscall_broker/broker_file_permission_unittest.cc @@ -0,0 +1,262 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/syscall_broker/broker_file_permission.h" + +#include <fcntl.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "base/logging.h" +#include "base/macros.h" +#include "sandbox/linux/tests/test_utils.h" +#include "sandbox/linux/tests/unit_tests.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace sandbox { + +namespace syscall_broker { + +class BrokerFilePermissionTester { + public: + static bool ValidatePath(const char* path) { + return BrokerFilePermission::ValidatePath(path); + } + static const char* GetErrorMessage() { + return BrokerFilePermission::GetErrorMessageForTests(); + } + + private: + DISALLOW_COPY_AND_ASSIGN(BrokerFilePermissionTester); +}; + +namespace { + +// Creation tests are DEATH tests as a bad permission causes termination. +SANDBOX_TEST(BrokerFilePermission, CreateGood) { + const char kPath[] = "/tmp/good"; + BrokerFilePermission perm = BrokerFilePermission::ReadOnly(kPath); +} + +SANDBOX_TEST(BrokerFilePermission, CreateGoodRecursive) { + const char kPath[] = "/tmp/good/"; + BrokerFilePermission perm = BrokerFilePermission::ReadOnlyRecursive(kPath); +} + +SANDBOX_DEATH_TEST( + BrokerFilePermission, + CreateBad, + DEATH_MESSAGE(BrokerFilePermissionTester::GetErrorMessage())) { + const char kPath[] = "/tmp/bad/"; + BrokerFilePermission perm = BrokerFilePermission::ReadOnly(kPath); +} + +SANDBOX_DEATH_TEST( + BrokerFilePermission, + CreateBadRecursive, + DEATH_MESSAGE(BrokerFilePermissionTester::GetErrorMessage())) { + const char kPath[] = "/tmp/bad"; + BrokerFilePermission perm = BrokerFilePermission::ReadOnlyRecursive(kPath); +} + +SANDBOX_DEATH_TEST( + BrokerFilePermission, + CreateBadNotAbs, + DEATH_MESSAGE(BrokerFilePermissionTester::GetErrorMessage())) { + const char kPath[] = "tmp/bad"; + BrokerFilePermission perm = BrokerFilePermission::ReadOnly(kPath); +} + +SANDBOX_DEATH_TEST( + BrokerFilePermission, + CreateBadEmpty, + DEATH_MESSAGE(BrokerFilePermissionTester::GetErrorMessage())) { + const char kPath[] = ""; + BrokerFilePermission perm = BrokerFilePermission::ReadOnly(kPath); +} + +// CheckPerm tests |path| against |perm| given |access_flags|. +// If |create| is true then file creation is tested for success. +void CheckPerm(const BrokerFilePermission& perm, + const char* path, + int access_flags, + bool create) { + const char* file_to_open = NULL; + + ASSERT_FALSE(perm.CheckAccess(path, X_OK, NULL)); + ASSERT_TRUE(perm.CheckAccess(path, F_OK, NULL)); + // check bad perms + switch (access_flags) { + case O_RDONLY: + ASSERT_TRUE(perm.CheckOpen(path, O_RDONLY, &file_to_open, NULL)); + ASSERT_FALSE(perm.CheckOpen(path, O_WRONLY, &file_to_open, NULL)); + ASSERT_FALSE(perm.CheckOpen(path, O_RDWR, &file_to_open, NULL)); + ASSERT_TRUE(perm.CheckAccess(path, R_OK, NULL)); + ASSERT_FALSE(perm.CheckAccess(path, W_OK, NULL)); + break; + case O_WRONLY: + ASSERT_FALSE(perm.CheckOpen(path, O_RDONLY, &file_to_open, NULL)); + ASSERT_TRUE(perm.CheckOpen(path, O_WRONLY, &file_to_open, NULL)); + ASSERT_FALSE(perm.CheckOpen(path, O_RDWR, &file_to_open, NULL)); + ASSERT_FALSE(perm.CheckAccess(path, R_OK, NULL)); + ASSERT_TRUE(perm.CheckAccess(path, W_OK, NULL)); + break; + case O_RDWR: + ASSERT_TRUE(perm.CheckOpen(path, O_RDONLY, &file_to_open, NULL)); + ASSERT_TRUE(perm.CheckOpen(path, O_WRONLY, &file_to_open, NULL)); + ASSERT_TRUE(perm.CheckOpen(path, O_RDWR, &file_to_open, NULL)); + ASSERT_TRUE(perm.CheckAccess(path, R_OK, NULL)); + ASSERT_TRUE(perm.CheckAccess(path, W_OK, NULL)); + break; + default: + // Bad test case + NOTREACHED(); + } + +// O_SYNC can be defined as (__O_SYNC|O_DSYNC) +#ifdef O_DSYNC + const int kSyncFlag = O_SYNC & ~O_DSYNC; +#else + const int kSyncFlag = O_SYNC; +#endif + + const int kNumberOfBitsInOAccMode = 2; + static_assert(O_ACCMODE == ((1 << kNumberOfBitsInOAccMode) - 1), + "incorrect number of bits"); + // check every possible flag and act accordingly. + // Skipping AccMode bits as they are present in every case. + for (int i = kNumberOfBitsInOAccMode; i < 32; i++) { + int flag = 1 << i; + switch (flag) { + case O_APPEND: + case O_ASYNC: + case O_DIRECT: + case O_DIRECTORY: +#ifdef O_DSYNC + case O_DSYNC: +#endif + case O_EXCL: + case O_LARGEFILE: + case O_NOATIME: + case O_NOCTTY: + case O_NOFOLLOW: + case O_NONBLOCK: +#if (O_NONBLOCK != O_NDELAY) + case O_NDELAY: +#endif + case kSyncFlag: + case O_TRUNC: + ASSERT_TRUE( + perm.CheckOpen(path, access_flags | flag, &file_to_open, NULL)); + break; + case O_CLOEXEC: + case O_CREAT: + default: + ASSERT_FALSE( + perm.CheckOpen(path, access_flags | flag, &file_to_open, NULL)); + } + } + if (create) { + bool unlink; + ASSERT_TRUE(perm.CheckOpen(path, O_CREAT | O_EXCL | access_flags, + &file_to_open, &unlink)); + ASSERT_FALSE(unlink); + } else { + ASSERT_FALSE(perm.CheckOpen(path, O_CREAT | O_EXCL | access_flags, + &file_to_open, NULL)); + } +} + +TEST(BrokerFilePermission, ReadOnly) { + const char kPath[] = "/tmp/good"; + BrokerFilePermission perm = BrokerFilePermission::ReadOnly(kPath); + CheckPerm(perm, kPath, O_RDONLY, false); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +TEST(BrokerFilePermission, ReadOnlyRecursive) { + const char kPath[] = "/tmp/good/"; + const char kPathFile[] = "/tmp/good/file"; + BrokerFilePermission perm = BrokerFilePermission::ReadOnlyRecursive(kPath); + CheckPerm(perm, kPathFile, O_RDONLY, false); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +TEST(BrokerFilePermission, WriteOnly) { + const char kPath[] = "/tmp/good"; + BrokerFilePermission perm = BrokerFilePermission::WriteOnly(kPath); + CheckPerm(perm, kPath, O_WRONLY, false); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +TEST(BrokerFilePermission, ReadWrite) { + const char kPath[] = "/tmp/good"; + BrokerFilePermission perm = BrokerFilePermission::ReadWrite(kPath); + CheckPerm(perm, kPath, O_RDWR, false); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +TEST(BrokerFilePermission, ReadWriteCreate) { + const char kPath[] = "/tmp/good"; + BrokerFilePermission perm = BrokerFilePermission::ReadWriteCreate(kPath); + CheckPerm(perm, kPath, O_RDWR, true); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +void CheckUnlink(BrokerFilePermission& perm, + const char* path, + int access_flags) { + bool unlink; + ASSERT_FALSE(perm.CheckOpen(path, access_flags, NULL, &unlink)); + ASSERT_FALSE(perm.CheckOpen(path, access_flags | O_CREAT, NULL, &unlink)); + ASSERT_TRUE( + perm.CheckOpen(path, access_flags | O_CREAT | O_EXCL, NULL, &unlink)); + ASSERT_TRUE(unlink); +} + +TEST(BrokerFilePermission, ReadWriteCreateUnlink) { + const char kPath[] = "/tmp/good"; + BrokerFilePermission perm = + BrokerFilePermission::ReadWriteCreateUnlink(kPath); + CheckUnlink(perm, kPath, O_RDWR); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +TEST(BrokerFilePermission, ReadWriteCreateUnlinkRecursive) { + const char kPath[] = "/tmp/good/"; + const char kPathFile[] = "/tmp/good/file"; + BrokerFilePermission perm = + BrokerFilePermission::ReadWriteCreateUnlinkRecursive(kPath); + CheckUnlink(perm, kPathFile, O_RDWR); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +TEST(BrokerFilePermission, ValidatePath) { + EXPECT_TRUE(BrokerFilePermissionTester::ValidatePath("/path")); + EXPECT_TRUE(BrokerFilePermissionTester::ValidatePath("/")); + EXPECT_TRUE(BrokerFilePermissionTester::ValidatePath("/..path")); + + EXPECT_FALSE(BrokerFilePermissionTester::ValidatePath("")); + EXPECT_FALSE(BrokerFilePermissionTester::ValidatePath("bad")); + EXPECT_FALSE(BrokerFilePermissionTester::ValidatePath("/bad/")); + EXPECT_FALSE(BrokerFilePermissionTester::ValidatePath("bad/")); + EXPECT_FALSE(BrokerFilePermissionTester::ValidatePath("/bad/..")); + EXPECT_FALSE(BrokerFilePermissionTester::ValidatePath("/bad/../bad")); + EXPECT_FALSE(BrokerFilePermissionTester::ValidatePath("/../bad")); +} + +} // namespace + +} // namespace syscall_broker + +} // namespace sandbox diff --git a/sandbox/linux/syscall_broker/broker_host.cc b/sandbox/linux/syscall_broker/broker_host.cc new file mode 100644 index 0000000000..830b98bf93 --- /dev/null +++ b/sandbox/linux/syscall_broker/broker_host.cc @@ -0,0 +1,231 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/syscall_broker/broker_host.h" + +#include <fcntl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include <string> +#include <vector> + +#include "base/files/scoped_file.h" +#include "base/logging.h" +#include "base/pickle.h" +#include "base/posix/eintr_wrapper.h" +#include "base/posix/unix_domain_socket_linux.h" +#include "sandbox/linux/syscall_broker/broker_common.h" +#include "sandbox/linux/syscall_broker/broker_policy.h" +#include "sandbox/linux/system_headers/linux_syscalls.h" +#include "third_party/valgrind/valgrind.h" + +namespace sandbox { + +namespace syscall_broker { + +namespace { + +bool IsRunningOnValgrind() { + return RUNNING_ON_VALGRIND; +} + +// A little open(2) wrapper to handle some oddities for us. In the general case +// make a direct system call since we want to keep in control of the broker +// process' system calls profile to be able to loosely sandbox it. +int sys_open(const char* pathname, int flags) { + // Hardcode mode to rw------- when creating files. + int mode; + if (flags & O_CREAT) { + mode = 0600; + } else { + mode = 0; + } + if (IsRunningOnValgrind()) { + // Valgrind does not support AT_FDCWD, just use libc's open() in this case. + return open(pathname, flags, mode); + } else { + return syscall(__NR_openat, AT_FDCWD, pathname, flags, mode); + } +} + +// Open |requested_filename| with |flags| if allowed by our policy. +// Write the syscall return value (-errno) to |write_pickle| and append +// a file descriptor to |opened_files| if relevant. +void OpenFileForIPC(const BrokerPolicy& policy, + const std::string& requested_filename, + int flags, + base::Pickle* write_pickle, + std::vector<int>* opened_files) { + DCHECK(write_pickle); + DCHECK(opened_files); + const char* file_to_open = NULL; + bool unlink_after_open = false; + const bool safe_to_open_file = policy.GetFileNameIfAllowedToOpen( + requested_filename.c_str(), flags, &file_to_open, &unlink_after_open); + + if (safe_to_open_file) { + CHECK(file_to_open); + int opened_fd = sys_open(file_to_open, flags); + if (opened_fd < 0) { + write_pickle->WriteInt(-errno); + } else { + // Success. + if (unlink_after_open) { + unlink(file_to_open); + } + opened_files->push_back(opened_fd); + write_pickle->WriteInt(0); + } + } else { + write_pickle->WriteInt(-policy.denied_errno()); + } +} + +// Perform access(2) on |requested_filename| with mode |mode| if allowed by our +// policy. Write the syscall return value (-errno) to |write_pickle|. +void AccessFileForIPC(const BrokerPolicy& policy, + const std::string& requested_filename, + int mode, + base::Pickle* write_pickle) { + DCHECK(write_pickle); + const char* file_to_access = NULL; + const bool safe_to_access_file = policy.GetFileNameIfAllowedToAccess( + requested_filename.c_str(), mode, &file_to_access); + + if (safe_to_access_file) { + CHECK(file_to_access); + int access_ret = access(file_to_access, mode); + int access_errno = errno; + if (!access_ret) + write_pickle->WriteInt(0); + else + write_pickle->WriteInt(-access_errno); + } else { + write_pickle->WriteInt(-policy.denied_errno()); + } +} + +// Handle a |command_type| request contained in |iter| and send the reply +// on |reply_ipc|. +// Currently COMMAND_OPEN and COMMAND_ACCESS are supported. +bool HandleRemoteCommand(const BrokerPolicy& policy, + IPCCommand command_type, + int reply_ipc, + base::PickleIterator iter) { + // Currently all commands have two arguments: filename and flags. + std::string requested_filename; + int flags = 0; + if (!iter.ReadString(&requested_filename) || !iter.ReadInt(&flags)) + return false; + + base::Pickle write_pickle; + std::vector<int> opened_files; + + switch (command_type) { + case COMMAND_ACCESS: + AccessFileForIPC(policy, requested_filename, flags, &write_pickle); + break; + case COMMAND_OPEN: + OpenFileForIPC( + policy, requested_filename, flags, &write_pickle, &opened_files); + break; + default: + LOG(ERROR) << "Invalid IPC command"; + break; + } + + CHECK_LE(write_pickle.size(), kMaxMessageLength); + ssize_t sent = base::UnixDomainSocket::SendMsg( + reply_ipc, write_pickle.data(), write_pickle.size(), opened_files); + + // Close anything we have opened in this process. + for (std::vector<int>::iterator it = opened_files.begin(); + it != opened_files.end(); + ++it) { + int ret = IGNORE_EINTR(close(*it)); + DCHECK(!ret) << "Could not close file descriptor"; + } + + if (sent <= 0) { + LOG(ERROR) << "Could not send IPC reply"; + return false; + } + return true; +} + +} // namespace + +BrokerHost::BrokerHost(const BrokerPolicy& broker_policy, + BrokerChannel::EndPoint ipc_channel) + : broker_policy_(broker_policy), ipc_channel_(ipc_channel.Pass()) { +} + +BrokerHost::~BrokerHost() { +} + +// Handle a request on the IPC channel ipc_channel_. +// A request should have a file descriptor attached on which we will reply and +// that we will then close. +// A request should start with an int that will be used as the command type. +BrokerHost::RequestStatus BrokerHost::HandleRequest() const { + ScopedVector<base::ScopedFD> fds; + char buf[kMaxMessageLength]; + errno = 0; + const ssize_t msg_len = base::UnixDomainSocket::RecvMsg( + ipc_channel_.get(), buf, sizeof(buf), &fds); + + if (msg_len == 0 || (msg_len == -1 && errno == ECONNRESET)) { + // EOF from the client, or the client died, we should die. + return RequestStatus::LOST_CLIENT; + } + + // The client should send exactly one file descriptor, on which we + // will write the reply. + // TODO(mdempsky): ScopedVector doesn't have 'at()', only 'operator[]'. + if (msg_len < 0 || fds.size() != 1 || fds[0]->get() < 0) { + PLOG(ERROR) << "Error reading message from the client"; + return RequestStatus::FAILURE; + } + + base::ScopedFD temporary_ipc(fds[0]->Pass()); + + base::Pickle pickle(buf, msg_len); + base::PickleIterator iter(pickle); + int command_type; + if (iter.ReadInt(&command_type)) { + bool command_handled = false; + // Go through all the possible IPC messages. + switch (command_type) { + case COMMAND_ACCESS: + case COMMAND_OPEN: + // We reply on the file descriptor sent to us via the IPC channel. + command_handled = HandleRemoteCommand( + broker_policy_, static_cast<IPCCommand>(command_type), + temporary_ipc.get(), iter); + break; + default: + NOTREACHED(); + break; + } + + if (command_handled) { + return RequestStatus::SUCCESS; + } else { + return RequestStatus::FAILURE; + } + + NOTREACHED(); + } + + LOG(ERROR) << "Error parsing IPC request"; + return RequestStatus::FAILURE; +} + +} // namespace syscall_broker + +} // namespace sandbox diff --git a/sandbox/linux/syscall_broker/broker_host.h b/sandbox/linux/syscall_broker/broker_host.h new file mode 100644 index 0000000000..9866507d1c --- /dev/null +++ b/sandbox/linux/syscall_broker/broker_host.h @@ -0,0 +1,41 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SYSCALL_BROKER_BROKER_HOST_H_ +#define SANDBOX_LINUX_SYSCALL_BROKER_BROKER_HOST_H_ + +#include "base/macros.h" +#include "sandbox/linux/syscall_broker/broker_channel.h" + +namespace sandbox { + +namespace syscall_broker { + +class BrokerPolicy; + +// The BrokerHost class should be embedded in a (presumably not sandboxed) +// process. It will honor IPC requests from a BrokerClient sent over +// |ipc_channel| according to |broker_policy|. +class BrokerHost { + public: + enum class RequestStatus { LOST_CLIENT = 0, SUCCESS, FAILURE }; + + BrokerHost(const BrokerPolicy& broker_policy, + BrokerChannel::EndPoint ipc_channel); + ~BrokerHost(); + + RequestStatus HandleRequest() const; + + private: + const BrokerPolicy& broker_policy_; + const BrokerChannel::EndPoint ipc_channel_; + + DISALLOW_COPY_AND_ASSIGN(BrokerHost); +}; + +} // namespace syscall_broker + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SYSCALL_BROKER_BROKER_HOST_H_ diff --git a/sandbox/linux/syscall_broker/broker_policy.cc b/sandbox/linux/syscall_broker/broker_policy.cc new file mode 100644 index 0000000000..d9f69e3b81 --- /dev/null +++ b/sandbox/linux/syscall_broker/broker_policy.cc @@ -0,0 +1,99 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/syscall_broker/broker_policy.h" + +#include <fcntl.h> +#include <stdint.h> +#include <string.h> + +#include <string> +#include <vector> + +#include "base/logging.h" +#include "sandbox/linux/syscall_broker/broker_common.h" + +namespace sandbox { +namespace syscall_broker { + +BrokerPolicy::BrokerPolicy(int denied_errno, + const std::vector<BrokerFilePermission>& permissions) + : denied_errno_(denied_errno), + permissions_(permissions), + num_of_permissions_(permissions.size()) { + // The spec guarantees vectors store their elements contiguously + // so set up a pointer to array of element so it can be used + // in async signal safe code instead of vector operations. + if (num_of_permissions_ > 0) { + permissions_array_ = &permissions_[0]; + } else { + permissions_array_ = NULL; + } +} + +BrokerPolicy::~BrokerPolicy() { +} + +// Check if calling access() should be allowed on |requested_filename| with +// mode |requested_mode|. +// Note: access() being a system call to check permissions, this can get a bit +// confusing. We're checking if calling access() should even be allowed with +// the same policy we would use for open(). +// If |file_to_access| is not NULL, we will return the matching pointer from +// the whitelist. For paranoia a caller should then use |file_to_access|. See +// GetFileNameIfAllowedToOpen() for more explanation. +// return true if calling access() on this file should be allowed, false +// otherwise. +// Async signal safe if and only if |file_to_access| is NULL. +bool BrokerPolicy::GetFileNameIfAllowedToAccess( + const char* requested_filename, + int requested_mode, + const char** file_to_access) const { + if (file_to_access && *file_to_access) { + // Make sure that callers never pass a non-empty string. In case callers + // wrongly forget to check the return value and look at the string + // instead, this could catch bugs. + RAW_LOG(FATAL, "*file_to_access should be NULL"); + return false; + } + for (size_t i = 0; i < num_of_permissions_; i++) { + if (permissions_array_[i].CheckAccess(requested_filename, requested_mode, + file_to_access)) { + return true; + } + } + return false; +} + +// Check if |requested_filename| can be opened with flags |requested_flags|. +// If |file_to_open| is not NULL, we will return the matching pointer from the +// whitelist. For paranoia, a caller should then use |file_to_open| rather +// than |requested_filename|, so that it never attempts to open an +// attacker-controlled file name, even if an attacker managed to fool the +// string comparison mechanism. +// Return true if opening should be allowed, false otherwise. +// Async signal safe if and only if |file_to_open| is NULL. +bool BrokerPolicy::GetFileNameIfAllowedToOpen(const char* requested_filename, + int requested_flags, + const char** file_to_open, + bool* unlink_after_open) const { + if (file_to_open && *file_to_open) { + // Make sure that callers never pass a non-empty string. In case callers + // wrongly forget to check the return value and look at the string + // instead, this could catch bugs. + RAW_LOG(FATAL, "*file_to_open should be NULL"); + return false; + } + for (size_t i = 0; i < num_of_permissions_; i++) { + if (permissions_array_[i].CheckOpen(requested_filename, requested_flags, + file_to_open, unlink_after_open)) { + return true; + } + } + return false; +} + +} // namespace syscall_broker + +} // namespace sandbox diff --git a/sandbox/linux/syscall_broker/broker_policy.h b/sandbox/linux/syscall_broker/broker_policy.h new file mode 100644 index 0000000000..d5146edc06 --- /dev/null +++ b/sandbox/linux/syscall_broker/broker_policy.h @@ -0,0 +1,87 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SYSCALL_BROKER_BROKER_POLICY_H_ +#define SANDBOX_LINUX_SYSCALL_BROKER_BROKER_POLICY_H_ + +#include <string> +#include <vector> + +#include "base/macros.h" + +#include "sandbox/linux/syscall_broker/broker_file_permission.h" + +namespace sandbox { +namespace syscall_broker { + +// BrokerPolicy allows to define the security policy enforced by a +// BrokerHost. The BrokerHost will evaluate requests sent over its +// IPC channel according to the BrokerPolicy. +// Some of the methods of this class can be used in an async-signal safe +// way. +class BrokerPolicy { + public: + // |denied_errno| is the error code returned when IPC requests for system + // calls such as open() or access() are denied because a file is not in the + // whitelist. EACCESS would be a typical value. + // |permissions| is a list of BrokerPermission objects that define + // what the broker will allow. + BrokerPolicy(int denied_errno, + const std::vector<BrokerFilePermission>& permissions); + + ~BrokerPolicy(); + + // Check if calling access() should be allowed on |requested_filename| with + // mode |requested_mode|. + // Note: access() being a system call to check permissions, this can get a bit + // confusing. We're checking if calling access() should even be allowed with + // If |file_to_open| is not NULL, a pointer to the path will be returned. + // In the case of a recursive match, this will be the requested_filename, + // otherwise it will return the matching pointer from the + // whitelist. For paranoia a caller should then use |file_to_access|. See + // GetFileNameIfAllowedToOpen() for more explanation. + // return true if calling access() on this file should be allowed, false + // otherwise. + // Async signal safe if and only if |file_to_access| is NULL. + bool GetFileNameIfAllowedToAccess(const char* requested_filename, + int requested_mode, + const char** file_to_access) const; + + // Check if |requested_filename| can be opened with flags |requested_flags|. + // If |file_to_open| is not NULL, a pointer to the path will be returned. + // In the case of a recursive match, this will be the requested_filename, + // otherwise it will return the matching pointer from the + // whitelist. For paranoia, a caller should then use |file_to_open| rather + // than |requested_filename|, so that it never attempts to open an + // attacker-controlled file name, even if an attacker managed to fool the + // string comparison mechanism. + // |unlink_after_open| if not NULL will be set to point to true if the + // policy requests the caller unlink the path after opening. + // Return true if opening should be allowed, false otherwise. + // Async signal safe if and only if |file_to_open| is NULL. + bool GetFileNameIfAllowedToOpen(const char* requested_filename, + int requested_flags, + const char** file_to_open, + bool* unlink_after_open) const; + int denied_errno() const { return denied_errno_; } + + private: + const int denied_errno_; + // The permissions_ vector is used as storage for the BrokerFilePermission + // objects but is not referenced outside of the constructor as + // vectors are unfriendly in async signal safe code. + const std::vector<BrokerFilePermission> permissions_; + // permissions_array_ is set up to point to the backing store of + // permissions_ and is used in async signal safe methods. + const BrokerFilePermission* permissions_array_; + const size_t num_of_permissions_; + + DISALLOW_COPY_AND_ASSIGN(BrokerPolicy); +}; + +} // namespace syscall_broker + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SYSCALL_BROKER_BROKER_POLICY_H_ diff --git a/sandbox/linux/syscall_broker/broker_process.cc b/sandbox/linux/syscall_broker/broker_process.cc new file mode 100644 index 0000000000..81131cc4e0 --- /dev/null +++ b/sandbox/linux/syscall_broker/broker_process.cc @@ -0,0 +1,120 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/syscall_broker/broker_process.h" + +#include <fcntl.h> +#include <signal.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <algorithm> +#include <string> +#include <vector> + +#include "base/callback.h" +#include "base/logging.h" +#include "base/memory/scoped_ptr.h" +#include "base/posix/eintr_wrapper.h" +#include "base/process/process_metrics.h" +#include "build/build_config.h" +#include "sandbox/linux/syscall_broker/broker_channel.h" +#include "sandbox/linux/syscall_broker/broker_client.h" +#include "sandbox/linux/syscall_broker/broker_host.h" + +namespace sandbox { + +namespace syscall_broker { + +BrokerProcess::BrokerProcess( + int denied_errno, + const std::vector<syscall_broker::BrokerFilePermission>& permissions, + bool fast_check_in_client, + bool quiet_failures_for_tests) + : initialized_(false), + fast_check_in_client_(fast_check_in_client), + quiet_failures_for_tests_(quiet_failures_for_tests), + broker_pid_(-1), + policy_(denied_errno, permissions) { +} + +BrokerProcess::~BrokerProcess() { + if (initialized_) { + if (broker_client_.get()) { + // Closing the socket should be enough to notify the child to die, + // unless it has been duplicated. + CloseChannel(); + } + PCHECK(0 == kill(broker_pid_, SIGKILL)); + siginfo_t process_info; + // Reap the child. + int ret = HANDLE_EINTR(waitid(P_PID, broker_pid_, &process_info, WEXITED)); + PCHECK(0 == ret); + } +} + +bool BrokerProcess::Init( + const base::Callback<bool(void)>& broker_process_init_callback) { + CHECK(!initialized_); + BrokerChannel::EndPoint ipc_reader; + BrokerChannel::EndPoint ipc_writer; + BrokerChannel::CreatePair(&ipc_reader, &ipc_writer); + +#if !defined(THREAD_SANITIZER) + DCHECK_EQ(1, base::GetNumberOfThreads(base::GetCurrentProcessHandle())); +#endif + int child_pid = fork(); + if (child_pid == -1) { + return false; + } + if (child_pid) { + // We are the parent and we have just forked our broker process. + ipc_reader.reset(); + broker_pid_ = child_pid; + broker_client_.reset(new BrokerClient(policy_, ipc_writer.Pass(), + fast_check_in_client_, + quiet_failures_for_tests_)); + initialized_ = true; + return true; + } else { + // We are the broker process. Make sure to close the writer's end so that + // we get notified if the client disappears. + ipc_writer.reset(); + CHECK(broker_process_init_callback.Run()); + BrokerHost broker_host(policy_, ipc_reader.Pass()); + for (;;) { + switch (broker_host.HandleRequest()) { + case BrokerHost::RequestStatus::LOST_CLIENT: + _exit(1); + case BrokerHost::RequestStatus::SUCCESS: + case BrokerHost::RequestStatus::FAILURE: + continue; + } + } + _exit(1); + } + NOTREACHED(); + return false; +} + +void BrokerProcess::CloseChannel() { + broker_client_.reset(); +} + +int BrokerProcess::Access(const char* pathname, int mode) const { + RAW_CHECK(initialized_); + return broker_client_->Access(pathname, mode); +} + +int BrokerProcess::Open(const char* pathname, int flags) const { + RAW_CHECK(initialized_); + return broker_client_->Open(pathname, flags); +} + +} // namespace syscall_broker + +} // namespace sandbox. diff --git a/sandbox/linux/syscall_broker/broker_process.h b/sandbox/linux/syscall_broker/broker_process.h new file mode 100644 index 0000000000..8a512a0c12 --- /dev/null +++ b/sandbox/linux/syscall_broker/broker_process.h @@ -0,0 +1,94 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SERVICES_BROKER_PROCESS_H_ +#define SANDBOX_LINUX_SERVICES_BROKER_PROCESS_H_ + +#include <string> +#include <vector> + +#include "base/callback_forward.h" +#include "base/macros.h" +#include "base/memory/scoped_ptr.h" +#include "base/pickle.h" +#include "base/process/process.h" +#include "sandbox/linux/syscall_broker/broker_policy.h" +#include "sandbox/sandbox_export.h" + +namespace sandbox { + +namespace syscall_broker { + +class BrokerClient; +class BrokerFilePermission; + +// Create a new "broker" process to which we can send requests via an IPC +// channel by forking the current process. +// This is a low level IPC mechanism that is suitable to be called from a +// signal handler. +// A process would typically create a broker process before entering +// sandboxing. +// 1. BrokerProcess open_broker(read_whitelist, write_whitelist); +// 2. CHECK(open_broker.Init(NULL)); +// 3. Enable sandbox. +// 4. Use open_broker.Open() to open files. +class SANDBOX_EXPORT BrokerProcess { + public: + // |denied_errno| is the error code returned when methods such as Open() + // or Access() are invoked on a file which is not in the whitelist. EACCESS + // would be a typical value. + // |allowed_r_files| and |allowed_w_files| are white lists of files that can + // be opened later via the Open() API, respectively for reading and writing. + // A file available read-write should be listed in both. + // |fast_check_in_client| and |quiet_failures_for_tests| are reserved for + // unit tests, don't use it. + + BrokerProcess( + int denied_errno, + const std::vector<syscall_broker::BrokerFilePermission>& permissions, + bool fast_check_in_client = true, + bool quiet_failures_for_tests = false); + + ~BrokerProcess(); + // Will initialize the broker process. There should be no threads at this + // point, since we need to fork(). + // broker_process_init_callback will be called in the new broker process, + // after fork() returns. + bool Init(const base::Callback<bool(void)>& broker_process_init_callback); + + // Can be used in place of access(). Will be async signal safe. + // X_OK will always return an error in practice since the broker process + // doesn't support execute permissions. + // It's similar to the access() system call and will return -errno on errors. + int Access(const char* pathname, int mode) const; + // Can be used in place of open(). Will be async signal safe. + // The implementation only supports certain white listed flags and will + // return -EPERM on other flags. + // It's similar to the open() system call and will return -errno on errors. + int Open(const char* pathname, int flags) const; + + int broker_pid() const { return broker_pid_; } + + private: + friend class BrokerProcessTestHelper; + + // Close the IPC channel with the other party. This should only be used + // by tests an none of the class methods should be used afterwards. + void CloseChannel(); + + bool initialized_; // Whether we've been through Init() yet. + const bool fast_check_in_client_; + const bool quiet_failures_for_tests_; + pid_t broker_pid_; // The PID of the broker (child). + syscall_broker::BrokerPolicy policy_; // The sandboxing policy. + scoped_ptr<syscall_broker::BrokerClient> broker_client_; + + DISALLOW_COPY_AND_ASSIGN(BrokerProcess); +}; + +} // namespace syscall_broker + +} // namespace sandbox + +#endif // SANDBOX_LINUX_SERVICES_BROKER_PROCESS_H_ diff --git a/sandbox/linux/syscall_broker/broker_process_unittest.cc b/sandbox/linux/syscall_broker/broker_process_unittest.cc new file mode 100644 index 0000000000..9ad0e719de --- /dev/null +++ b/sandbox/linux/syscall_broker/broker_process_unittest.cc @@ -0,0 +1,656 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/syscall_broker/broker_process.h" + +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <algorithm> +#include <string> +#include <vector> + +#include "base/bind.h" +#include "base/files/file_util.h" +#include "base/files/scoped_file.h" +#include "base/logging.h" +#include "base/memory/scoped_ptr.h" +#include "base/posix/eintr_wrapper.h" +#include "base/posix/unix_domain_socket_linux.h" +#include "sandbox/linux/syscall_broker/broker_client.h" +#include "sandbox/linux/tests/scoped_temporary_file.h" +#include "sandbox/linux/tests/test_utils.h" +#include "sandbox/linux/tests/unit_tests.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace sandbox { + +namespace syscall_broker { + +class BrokerProcessTestHelper { + public: + static void CloseChannel(BrokerProcess* broker) { broker->CloseChannel(); } + // Get the client's IPC descriptor to send IPC requests directly. + // TODO(jln): refator tests to get rid of this. + static int GetIPCDescriptor(const BrokerProcess* broker) { + return broker->broker_client_->GetIPCDescriptor(); + } +}; + +namespace { + +bool NoOpCallback() { + return true; +} + +} // namespace + +TEST(BrokerProcess, CreateAndDestroy) { + std::vector<BrokerFilePermission> permissions; + permissions.push_back(BrokerFilePermission::ReadOnly("/proc/cpuinfo")); + + scoped_ptr<BrokerProcess> open_broker(new BrokerProcess(EPERM, permissions)); + ASSERT_TRUE(open_broker->Init(base::Bind(&NoOpCallback))); + + ASSERT_TRUE(TestUtils::CurrentProcessHasChildren()); + // Destroy the broker and check it has exited properly. + open_broker.reset(); + ASSERT_FALSE(TestUtils::CurrentProcessHasChildren()); +} + +TEST(BrokerProcess, TestOpenAccessNull) { + std::vector<BrokerFilePermission> empty; + BrokerProcess open_broker(EPERM, empty); + ASSERT_TRUE(open_broker.Init(base::Bind(&NoOpCallback))); + + int fd = open_broker.Open(NULL, O_RDONLY); + ASSERT_EQ(fd, -EFAULT); + + int ret = open_broker.Access(NULL, F_OK); + ASSERT_EQ(ret, -EFAULT); +} + +void TestOpenFilePerms(bool fast_check_in_client, int denied_errno) { + const char kR_WhiteListed[] = "/proc/DOESNOTEXIST1"; + // We can't debug the init process, and shouldn't be able to access + // its auxv file. + const char kR_WhiteListedButDenied[] = "/proc/1/auxv"; + const char kW_WhiteListed[] = "/proc/DOESNOTEXIST2"; + const char kRW_WhiteListed[] = "/proc/DOESNOTEXIST3"; + const char k_NotWhitelisted[] = "/proc/DOESNOTEXIST4"; + + std::vector<BrokerFilePermission> permissions; + permissions.push_back(BrokerFilePermission::ReadOnly(kR_WhiteListed)); + permissions.push_back( + BrokerFilePermission::ReadOnly(kR_WhiteListedButDenied)); + permissions.push_back(BrokerFilePermission::WriteOnly(kW_WhiteListed)); + permissions.push_back(BrokerFilePermission::ReadWrite(kRW_WhiteListed)); + + BrokerProcess open_broker(denied_errno, permissions, fast_check_in_client); + ASSERT_TRUE(open_broker.Init(base::Bind(&NoOpCallback))); + + int fd = -1; + fd = open_broker.Open(kR_WhiteListed, O_RDONLY); + ASSERT_EQ(fd, -ENOENT); + fd = open_broker.Open(kR_WhiteListed, O_WRONLY); + ASSERT_EQ(fd, -denied_errno); + fd = open_broker.Open(kR_WhiteListed, O_RDWR); + ASSERT_EQ(fd, -denied_errno); + int ret = -1; + ret = open_broker.Access(kR_WhiteListed, F_OK); + ASSERT_EQ(ret, -ENOENT); + ret = open_broker.Access(kR_WhiteListed, R_OK); + ASSERT_EQ(ret, -ENOENT); + ret = open_broker.Access(kR_WhiteListed, W_OK); + ASSERT_EQ(ret, -denied_errno); + ret = open_broker.Access(kR_WhiteListed, R_OK | W_OK); + ASSERT_EQ(ret, -denied_errno); + ret = open_broker.Access(kR_WhiteListed, X_OK); + ASSERT_EQ(ret, -denied_errno); + ret = open_broker.Access(kR_WhiteListed, R_OK | X_OK); + ASSERT_EQ(ret, -denied_errno); + + // Android sometimes runs tests as root. + // This part of the test requires a process that doesn't have + // CAP_DAC_OVERRIDE. We check against a root euid as a proxy for that. + if (geteuid()) { + fd = open_broker.Open(kR_WhiteListedButDenied, O_RDONLY); + // The broker process will allow this, but the normal permission system + // won't. + ASSERT_EQ(fd, -EACCES); + fd = open_broker.Open(kR_WhiteListedButDenied, O_WRONLY); + ASSERT_EQ(fd, -denied_errno); + fd = open_broker.Open(kR_WhiteListedButDenied, O_RDWR); + ASSERT_EQ(fd, -denied_errno); + ret = open_broker.Access(kR_WhiteListedButDenied, F_OK); + // The normal permission system will let us check that the file exists. + ASSERT_EQ(ret, 0); + ret = open_broker.Access(kR_WhiteListedButDenied, R_OK); + ASSERT_EQ(ret, -EACCES); + ret = open_broker.Access(kR_WhiteListedButDenied, W_OK); + ASSERT_EQ(ret, -denied_errno); + ret = open_broker.Access(kR_WhiteListedButDenied, R_OK | W_OK); + ASSERT_EQ(ret, -denied_errno); + ret = open_broker.Access(kR_WhiteListedButDenied, X_OK); + ASSERT_EQ(ret, -denied_errno); + ret = open_broker.Access(kR_WhiteListedButDenied, R_OK | X_OK); + ASSERT_EQ(ret, -denied_errno); + } + + fd = open_broker.Open(kW_WhiteListed, O_RDONLY); + ASSERT_EQ(fd, -denied_errno); + fd = open_broker.Open(kW_WhiteListed, O_WRONLY); + ASSERT_EQ(fd, -ENOENT); + fd = open_broker.Open(kW_WhiteListed, O_RDWR); + ASSERT_EQ(fd, -denied_errno); + ret = open_broker.Access(kW_WhiteListed, F_OK); + ASSERT_EQ(ret, -ENOENT); + ret = open_broker.Access(kW_WhiteListed, R_OK); + ASSERT_EQ(ret, -denied_errno); + ret = open_broker.Access(kW_WhiteListed, W_OK); + ASSERT_EQ(ret, -ENOENT); + ret = open_broker.Access(kW_WhiteListed, R_OK | W_OK); + ASSERT_EQ(ret, -denied_errno); + ret = open_broker.Access(kW_WhiteListed, X_OK); + ASSERT_EQ(ret, -denied_errno); + ret = open_broker.Access(kW_WhiteListed, R_OK | X_OK); + ASSERT_EQ(ret, -denied_errno); + + fd = open_broker.Open(kRW_WhiteListed, O_RDONLY); + ASSERT_EQ(fd, -ENOENT); + fd = open_broker.Open(kRW_WhiteListed, O_WRONLY); + ASSERT_EQ(fd, -ENOENT); + fd = open_broker.Open(kRW_WhiteListed, O_RDWR); + ASSERT_EQ(fd, -ENOENT); + ret = open_broker.Access(kRW_WhiteListed, F_OK); + ASSERT_EQ(ret, -ENOENT); + ret = open_broker.Access(kRW_WhiteListed, R_OK); + ASSERT_EQ(ret, -ENOENT); + ret = open_broker.Access(kRW_WhiteListed, W_OK); + ASSERT_EQ(ret, -ENOENT); + ret = open_broker.Access(kRW_WhiteListed, R_OK | W_OK); + ASSERT_EQ(ret, -ENOENT); + ret = open_broker.Access(kRW_WhiteListed, X_OK); + ASSERT_EQ(ret, -denied_errno); + ret = open_broker.Access(kRW_WhiteListed, R_OK | X_OK); + ASSERT_EQ(ret, -denied_errno); + + fd = open_broker.Open(k_NotWhitelisted, O_RDONLY); + ASSERT_EQ(fd, -denied_errno); + fd = open_broker.Open(k_NotWhitelisted, O_WRONLY); + ASSERT_EQ(fd, -denied_errno); + fd = open_broker.Open(k_NotWhitelisted, O_RDWR); + ASSERT_EQ(fd, -denied_errno); + ret = open_broker.Access(k_NotWhitelisted, F_OK); + ASSERT_EQ(ret, -denied_errno); + ret = open_broker.Access(k_NotWhitelisted, R_OK); + ASSERT_EQ(ret, -denied_errno); + ret = open_broker.Access(k_NotWhitelisted, W_OK); + ASSERT_EQ(ret, -denied_errno); + ret = open_broker.Access(k_NotWhitelisted, R_OK | W_OK); + ASSERT_EQ(ret, -denied_errno); + ret = open_broker.Access(k_NotWhitelisted, X_OK); + ASSERT_EQ(ret, -denied_errno); + ret = open_broker.Access(k_NotWhitelisted, R_OK | X_OK); + ASSERT_EQ(ret, -denied_errno); + + // We have some extra sanity check for clearly wrong values. + fd = open_broker.Open(kRW_WhiteListed, O_RDONLY | O_WRONLY | O_RDWR); + ASSERT_EQ(fd, -denied_errno); + + // It makes no sense to allow O_CREAT in a 2-parameters open. Ensure this + // is denied. + fd = open_broker.Open(kRW_WhiteListed, O_RDWR | O_CREAT); + ASSERT_EQ(fd, -denied_errno); +} + +// Run the same thing twice. The second time, we make sure that no security +// check is performed on the client. +TEST(BrokerProcess, OpenFilePermsWithClientCheck) { + TestOpenFilePerms(true /* fast_check_in_client */, EPERM); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +TEST(BrokerProcess, OpenOpenFilePermsNoClientCheck) { + TestOpenFilePerms(false /* fast_check_in_client */, EPERM); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +// Run the same twice again, but with ENOENT instead of EPERM. +TEST(BrokerProcess, OpenFilePermsWithClientCheckNoEnt) { + TestOpenFilePerms(true /* fast_check_in_client */, ENOENT); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +TEST(BrokerProcess, OpenOpenFilePermsNoClientCheckNoEnt) { + TestOpenFilePerms(false /* fast_check_in_client */, ENOENT); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +void TestBadPaths(bool fast_check_in_client) { + const char kFileCpuInfo[] = "/proc/cpuinfo"; + const char kNotAbsPath[] = "proc/cpuinfo"; + const char kDotDotStart[] = "/../proc/cpuinfo"; + const char kDotDotMiddle[] = "/proc/self/../cpuinfo"; + const char kDotDotEnd[] = "/proc/.."; + const char kTrailingSlash[] = "/proc/"; + + std::vector<BrokerFilePermission> permissions; + + permissions.push_back(BrokerFilePermission::ReadOnlyRecursive("/proc/")); + scoped_ptr<BrokerProcess> open_broker( + new BrokerProcess(EPERM, permissions, fast_check_in_client)); + ASSERT_TRUE(open_broker->Init(base::Bind(&NoOpCallback))); + // Open cpuinfo via the broker. + int cpuinfo_fd = open_broker->Open(kFileCpuInfo, O_RDONLY); + base::ScopedFD cpuinfo_fd_closer(cpuinfo_fd); + ASSERT_GE(cpuinfo_fd, 0); + + int fd = -1; + int can_access; + + can_access = open_broker->Access(kNotAbsPath, R_OK); + ASSERT_EQ(can_access, -EPERM); + fd = open_broker->Open(kNotAbsPath, O_RDONLY); + ASSERT_EQ(fd, -EPERM); + + can_access = open_broker->Access(kDotDotStart, R_OK); + ASSERT_EQ(can_access, -EPERM); + fd = open_broker->Open(kDotDotStart, O_RDONLY); + ASSERT_EQ(fd, -EPERM); + + can_access = open_broker->Access(kDotDotMiddle, R_OK); + ASSERT_EQ(can_access, -EPERM); + fd = open_broker->Open(kDotDotMiddle, O_RDONLY); + ASSERT_EQ(fd, -EPERM); + + can_access = open_broker->Access(kDotDotEnd, R_OK); + ASSERT_EQ(can_access, -EPERM); + fd = open_broker->Open(kDotDotEnd, O_RDONLY); + ASSERT_EQ(fd, -EPERM); + + can_access = open_broker->Access(kTrailingSlash, R_OK); + ASSERT_EQ(can_access, -EPERM); + fd = open_broker->Open(kTrailingSlash, O_RDONLY); + ASSERT_EQ(fd, -EPERM); +} + +TEST(BrokerProcess, BadPathsClientCheck) { + TestBadPaths(true /* fast_check_in_client */); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +TEST(BrokerProcess, BadPathsNoClientCheck) { + TestBadPaths(false /* fast_check_in_client */); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +void TestOpenCpuinfo(bool fast_check_in_client, bool recursive) { + const char kFileCpuInfo[] = "/proc/cpuinfo"; + const char kDirProc[] = "/proc/"; + + std::vector<BrokerFilePermission> permissions; + if (recursive) + permissions.push_back(BrokerFilePermission::ReadOnlyRecursive(kDirProc)); + else + permissions.push_back(BrokerFilePermission::ReadOnly(kFileCpuInfo)); + + scoped_ptr<BrokerProcess> open_broker( + new BrokerProcess(EPERM, permissions, fast_check_in_client)); + ASSERT_TRUE(open_broker->Init(base::Bind(&NoOpCallback))); + + int fd = -1; + fd = open_broker->Open(kFileCpuInfo, O_RDWR); + base::ScopedFD fd_closer(fd); + ASSERT_EQ(fd, -EPERM); + + // Check we can read /proc/cpuinfo. + int can_access = open_broker->Access(kFileCpuInfo, R_OK); + ASSERT_EQ(can_access, 0); + can_access = open_broker->Access(kFileCpuInfo, W_OK); + ASSERT_EQ(can_access, -EPERM); + // Check we can not write /proc/cpuinfo. + + // Open cpuinfo via the broker. + int cpuinfo_fd = open_broker->Open(kFileCpuInfo, O_RDONLY); + base::ScopedFD cpuinfo_fd_closer(cpuinfo_fd); + ASSERT_GE(cpuinfo_fd, 0); + char buf[3]; + memset(buf, 0, sizeof(buf)); + int read_len1 = read(cpuinfo_fd, buf, sizeof(buf)); + ASSERT_GT(read_len1, 0); + + // Open cpuinfo directly. + int cpuinfo_fd2 = open(kFileCpuInfo, O_RDONLY); + base::ScopedFD cpuinfo_fd2_closer(cpuinfo_fd2); + ASSERT_GE(cpuinfo_fd2, 0); + char buf2[3]; + memset(buf2, 1, sizeof(buf2)); + int read_len2 = read(cpuinfo_fd2, buf2, sizeof(buf2)); + ASSERT_GT(read_len1, 0); + + // The following is not guaranteed true, but will be in practice. + ASSERT_EQ(read_len1, read_len2); + // Compare the cpuinfo as returned by the broker with the one we opened + // ourselves. + ASSERT_EQ(memcmp(buf, buf2, read_len1), 0); + + ASSERT_TRUE(TestUtils::CurrentProcessHasChildren()); + open_broker.reset(); + ASSERT_FALSE(TestUtils::CurrentProcessHasChildren()); +} + +// Run this test 4 times. With and without the check in client +// and using a recursive path. +TEST(BrokerProcess, OpenCpuinfoWithClientCheck) { + TestOpenCpuinfo(true /* fast_check_in_client */, false /* not recursive */); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +TEST(BrokerProcess, OpenCpuinfoNoClientCheck) { + TestOpenCpuinfo(false /* fast_check_in_client */, false /* not recursive */); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +TEST(BrokerProcess, OpenCpuinfoWithClientCheckRecursive) { + TestOpenCpuinfo(true /* fast_check_in_client */, true /* recursive */); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +TEST(BrokerProcess, OpenCpuinfoNoClientCheckRecursive) { + TestOpenCpuinfo(false /* fast_check_in_client */, true /* recursive */); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +TEST(BrokerProcess, OpenFileRW) { + ScopedTemporaryFile tempfile; + const char* tempfile_name = tempfile.full_file_name(); + + std::vector<BrokerFilePermission> permissions; + permissions.push_back(BrokerFilePermission::ReadWrite(tempfile_name)); + + BrokerProcess open_broker(EPERM, permissions); + ASSERT_TRUE(open_broker.Init(base::Bind(&NoOpCallback))); + + // Check we can access that file with read or write. + int can_access = open_broker.Access(tempfile_name, R_OK | W_OK); + ASSERT_EQ(can_access, 0); + + int tempfile2 = -1; + tempfile2 = open_broker.Open(tempfile_name, O_RDWR); + ASSERT_GE(tempfile2, 0); + + // Write to the descriptor opened by the broker. + char test_text[] = "TESTTESTTEST"; + ssize_t len = write(tempfile2, test_text, sizeof(test_text)); + ASSERT_EQ(len, static_cast<ssize_t>(sizeof(test_text))); + + // Read back from the original file descriptor what we wrote through + // the descriptor provided by the broker. + char buf[1024]; + len = read(tempfile.fd(), buf, sizeof(buf)); + + ASSERT_EQ(len, static_cast<ssize_t>(sizeof(test_text))); + ASSERT_EQ(memcmp(test_text, buf, sizeof(test_text)), 0); + + ASSERT_EQ(close(tempfile2), 0); +} + +// SANDBOX_TEST because the process could die with a SIGPIPE +// and we want this to happen in a subprocess. +SANDBOX_TEST(BrokerProcess, BrokerDied) { + const char kCpuInfo[] = "/proc/cpuinfo"; + std::vector<BrokerFilePermission> permissions; + permissions.push_back(BrokerFilePermission::ReadOnly(kCpuInfo)); + + BrokerProcess open_broker(EPERM, permissions, true /* fast_check_in_client */, + true /* quiet_failures_for_tests */); + SANDBOX_ASSERT(open_broker.Init(base::Bind(&NoOpCallback))); + const pid_t broker_pid = open_broker.broker_pid(); + SANDBOX_ASSERT(kill(broker_pid, SIGKILL) == 0); + + // Now we check that the broker has been signaled, but do not reap it. + siginfo_t process_info; + SANDBOX_ASSERT(HANDLE_EINTR(waitid( + P_PID, broker_pid, &process_info, WEXITED | WNOWAIT)) == + 0); + SANDBOX_ASSERT(broker_pid == process_info.si_pid); + SANDBOX_ASSERT(CLD_KILLED == process_info.si_code); + SANDBOX_ASSERT(SIGKILL == process_info.si_status); + + // Check that doing Open with a dead broker won't SIGPIPE us. + SANDBOX_ASSERT(open_broker.Open(kCpuInfo, O_RDONLY) == -ENOMEM); + SANDBOX_ASSERT(open_broker.Access(kCpuInfo, O_RDONLY) == -ENOMEM); +} + +void TestOpenComplexFlags(bool fast_check_in_client) { + const char kCpuInfo[] = "/proc/cpuinfo"; + std::vector<BrokerFilePermission> permissions; + permissions.push_back(BrokerFilePermission::ReadOnly(kCpuInfo)); + + BrokerProcess open_broker(EPERM, permissions, fast_check_in_client); + ASSERT_TRUE(open_broker.Init(base::Bind(&NoOpCallback))); + // Test that we do the right thing for O_CLOEXEC and O_NONBLOCK. + int fd = -1; + int ret = 0; + fd = open_broker.Open(kCpuInfo, O_RDONLY); + ASSERT_GE(fd, 0); + ret = fcntl(fd, F_GETFL); + ASSERT_NE(-1, ret); + // The descriptor shouldn't have the O_CLOEXEC attribute, nor O_NONBLOCK. + ASSERT_EQ(0, ret & (O_CLOEXEC | O_NONBLOCK)); + ASSERT_EQ(0, close(fd)); + + fd = open_broker.Open(kCpuInfo, O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0); + ret = fcntl(fd, F_GETFD); + ASSERT_NE(-1, ret); + // Important: use F_GETFD, not F_GETFL. The O_CLOEXEC flag in F_GETFL + // is actually not used by the kernel. + ASSERT_TRUE(FD_CLOEXEC & ret); + ASSERT_EQ(0, close(fd)); + + fd = open_broker.Open(kCpuInfo, O_RDONLY | O_NONBLOCK); + ASSERT_GE(fd, 0); + ret = fcntl(fd, F_GETFL); + ASSERT_NE(-1, ret); + ASSERT_TRUE(O_NONBLOCK & ret); + ASSERT_EQ(0, close(fd)); +} + +TEST(BrokerProcess, OpenComplexFlagsWithClientCheck) { + TestOpenComplexFlags(true /* fast_check_in_client */); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +TEST(BrokerProcess, OpenComplexFlagsNoClientCheck) { + TestOpenComplexFlags(false /* fast_check_in_client */); + // Don't do anything here, so that ASSERT works in the subfunction as + // expected. +} + +// We need to allow noise because the broker will log when it receives our +// bogus IPCs. +SANDBOX_TEST_ALLOW_NOISE(BrokerProcess, RecvMsgDescriptorLeak) { + // Android creates a socket on first use of the LOG call. + // We need to ensure this socket is open before we + // begin the test. + LOG(INFO) << "Ensure Android LOG socket is allocated"; + + // Find the four lowest available file descriptors. + int available_fds[4]; + SANDBOX_ASSERT(0 == pipe(available_fds)); + SANDBOX_ASSERT(0 == pipe(available_fds + 2)); + + // Save one FD to send to the broker later, and close the others. + base::ScopedFD message_fd(available_fds[0]); + for (size_t i = 1; i < arraysize(available_fds); i++) { + SANDBOX_ASSERT(0 == IGNORE_EINTR(close(available_fds[i]))); + } + + // Lower our file descriptor limit to just allow three more file descriptors + // to be allocated. (N.B., RLIMIT_NOFILE doesn't limit the number of file + // descriptors a process can have: it only limits the highest value that can + // be assigned to newly-created descriptors allocated by the process.) + const rlim_t fd_limit = + 1 + + *std::max_element(available_fds, + available_fds + arraysize(available_fds)); + + // Valgrind doesn't allow changing the hard descriptor limit, so we only + // change the soft descriptor limit here. + struct rlimit rlim; + SANDBOX_ASSERT(0 == getrlimit(RLIMIT_NOFILE, &rlim)); + SANDBOX_ASSERT(fd_limit <= rlim.rlim_cur); + rlim.rlim_cur = fd_limit; + SANDBOX_ASSERT(0 == setrlimit(RLIMIT_NOFILE, &rlim)); + + static const char kCpuInfo[] = "/proc/cpuinfo"; + std::vector<BrokerFilePermission> permissions; + permissions.push_back(BrokerFilePermission::ReadOnly(kCpuInfo)); + + BrokerProcess open_broker(EPERM, permissions); + SANDBOX_ASSERT(open_broker.Init(base::Bind(&NoOpCallback))); + + const int ipc_fd = BrokerProcessTestHelper::GetIPCDescriptor(&open_broker); + SANDBOX_ASSERT(ipc_fd >= 0); + + static const char kBogus[] = "not a pickle"; + std::vector<int> fds; + fds.push_back(message_fd.get()); + + // The broker process should only have a couple spare file descriptors + // available, but for good measure we send it fd_limit bogus IPCs anyway. + for (rlim_t i = 0; i < fd_limit; ++i) { + SANDBOX_ASSERT( + base::UnixDomainSocket::SendMsg(ipc_fd, kBogus, sizeof(kBogus), fds)); + } + + const int fd = open_broker.Open(kCpuInfo, O_RDONLY); + SANDBOX_ASSERT(fd >= 0); + SANDBOX_ASSERT(0 == IGNORE_EINTR(close(fd))); +} + +bool CloseFD(int fd) { + PCHECK(0 == IGNORE_EINTR(close(fd))); + return true; +} + +// Return true if the other end of the |reader| pipe was closed, +// false if |timeout_in_seconds| was reached or another event +// or error occured. +bool WaitForClosedPipeWriter(int reader, int timeout_in_ms) { + struct pollfd poll_fd = {reader, POLLIN | POLLRDHUP, 0}; + const int num_events = HANDLE_EINTR(poll(&poll_fd, 1, timeout_in_ms)); + if (1 == num_events && poll_fd.revents | POLLHUP) + return true; + return false; +} + +// Closing the broker client's IPC channel should terminate the broker +// process. +TEST(BrokerProcess, BrokerDiesOnClosedChannel) { + std::vector<BrokerFilePermission> permissions; + permissions.push_back(BrokerFilePermission::ReadOnly("/proc/cpuinfo")); + + // Get the writing end of a pipe into the broker (child) process so + // that we can reliably detect when it dies. + int lifeline_fds[2]; + PCHECK(0 == pipe(lifeline_fds)); + + BrokerProcess open_broker(EPERM, permissions, true /* fast_check_in_client */, + false /* quiet_failures_for_tests */); + ASSERT_TRUE(open_broker.Init(base::Bind(&CloseFD, lifeline_fds[0]))); + // Make sure the writing end only exists in the broker process. + CloseFD(lifeline_fds[1]); + base::ScopedFD reader(lifeline_fds[0]); + + const pid_t broker_pid = open_broker.broker_pid(); + + // This should cause the broker process to exit. + BrokerProcessTestHelper::CloseChannel(&open_broker); + + const int kTimeoutInMilliseconds = 5000; + const bool broker_lifeline_closed = + WaitForClosedPipeWriter(reader.get(), kTimeoutInMilliseconds); + // If the broker exited, its lifeline fd should be closed. + ASSERT_TRUE(broker_lifeline_closed); + // Now check that the broker has exited, but do not reap it. + siginfo_t process_info; + ASSERT_EQ(0, HANDLE_EINTR(waitid(P_PID, broker_pid, &process_info, + WEXITED | WNOWAIT))); + EXPECT_EQ(broker_pid, process_info.si_pid); + EXPECT_EQ(CLD_EXITED, process_info.si_code); + EXPECT_EQ(1, process_info.si_status); +} + +TEST(BrokerProcess, CreateFile) { + std::string temp_str; + { + ScopedTemporaryFile tmp_file; + temp_str = tmp_file.full_file_name(); + } + const char* tempfile_name = temp_str.c_str(); + + std::vector<BrokerFilePermission> permissions; + permissions.push_back(BrokerFilePermission::ReadWriteCreate(tempfile_name)); + + BrokerProcess open_broker(EPERM, permissions); + ASSERT_TRUE(open_broker.Init(base::Bind(&NoOpCallback))); + + int fd = -1; + + // Try without O_EXCL + fd = open_broker.Open(tempfile_name, O_RDWR | O_CREAT); + ASSERT_EQ(fd, -EPERM); + + const char kTestText[] = "TESTTESTTEST"; + // Create a file + fd = open_broker.Open(tempfile_name, O_RDWR | O_CREAT | O_EXCL); + ASSERT_GE(fd, 0); + { + base::ScopedFD scoped_fd(fd); + + // Confirm fail if file exists + int bad_fd = open_broker.Open(tempfile_name, O_RDWR | O_CREAT | O_EXCL); + ASSERT_EQ(bad_fd, -EEXIST); + + // Write to the descriptor opened by the broker. + + ssize_t len = HANDLE_EINTR(write(fd, kTestText, sizeof(kTestText))); + ASSERT_EQ(len, static_cast<ssize_t>(sizeof(kTestText))); + } + + int fd_check = open(tempfile_name, O_RDONLY); + ASSERT_GE(fd_check, 0); + { + base::ScopedFD scoped_fd(fd_check); + char buf[1024]; + ssize_t len = HANDLE_EINTR(read(fd_check, buf, sizeof(buf))); + + ASSERT_EQ(len, static_cast<ssize_t>(sizeof(kTestText))); + ASSERT_EQ(memcmp(kTestText, buf, sizeof(kTestText)), 0); + } +} + +} // namespace syscall_broker + +} // namespace sandbox |