diff options
Diffstat (limited to 'sandbox/linux/bpf_dsl/codegen_unittest.cc')
-rw-r--r-- | sandbox/linux/bpf_dsl/codegen_unittest.cc | 402 |
1 files changed, 402 insertions, 0 deletions
diff --git a/sandbox/linux/bpf_dsl/codegen_unittest.cc b/sandbox/linux/bpf_dsl/codegen_unittest.cc new file mode 100644 index 0000000000..5961822123 --- /dev/null +++ b/sandbox/linux/bpf_dsl/codegen_unittest.cc @@ -0,0 +1,402 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sandbox/linux/bpf_dsl/codegen.h" + +#include <map> +#include <utility> +#include <vector> + +#include "base/macros.h" +#include "base/md5.h" +#include "base/strings/string_piece.h" +#include "sandbox/linux/system_headers/linux_filter.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace sandbox { +namespace { + +// Hash provides an abstraction for building "hash trees" from BPF +// control flow graphs, and efficiently identifying equivalent graphs. +// +// For simplicity, we use MD5, because base happens to provide a +// convenient API for its use. However, any collision-resistant hash +// should suffice. +class Hash { + public: + static const Hash kZero; + + Hash() : digest_() {} + + Hash(uint16_t code, + uint32_t k, + const Hash& jt = kZero, + const Hash& jf = kZero) + : digest_() { + base::MD5Context ctx; + base::MD5Init(&ctx); + HashValue(&ctx, code); + HashValue(&ctx, k); + HashValue(&ctx, jt); + HashValue(&ctx, jf); + base::MD5Final(&digest_, &ctx); + } + + Hash(const Hash& hash) = default; + Hash& operator=(const Hash& rhs) = default; + + friend bool operator==(const Hash& lhs, const Hash& rhs) { + return lhs.Base16() == rhs.Base16(); + } + friend bool operator!=(const Hash& lhs, const Hash& rhs) { + return !(lhs == rhs); + } + + private: + template <typename T> + void HashValue(base::MD5Context* ctx, const T& value) { + base::MD5Update(ctx, + base::StringPiece(reinterpret_cast<const char*>(&value), + sizeof(value))); + } + + std::string Base16() const { + return base::MD5DigestToBase16(digest_); + } + + base::MD5Digest digest_; +}; + +const Hash Hash::kZero; + +// Sanity check that equality and inequality work on Hash as required. +TEST(CodeGen, HashSanity) { + std::vector<Hash> hashes; + + // Push a bunch of logically distinct hashes. + hashes.push_back(Hash::kZero); + for (int i = 0; i < 4; ++i) { + hashes.push_back(Hash(i & 1, i & 2)); + } + for (int i = 0; i < 16; ++i) { + hashes.push_back(Hash(i & 1, i & 2, Hash(i & 4, i & 8))); + } + for (int i = 0; i < 64; ++i) { + hashes.push_back( + Hash(i & 1, i & 2, Hash(i & 4, i & 8), Hash(i & 16, i & 32))); + } + + for (const Hash& a : hashes) { + for (const Hash& b : hashes) { + // Hashes should equal themselves, but not equal all others. + if (&a == &b) { + EXPECT_EQ(a, b); + } else { + EXPECT_NE(a, b); + } + } + } +} + +// ProgramTest provides a fixture for writing compiling sample +// programs with CodeGen and verifying the linearized output matches +// the input DAG. +class ProgramTest : public ::testing::Test { + protected: + ProgramTest() : gen_(), node_hashes_() {} + + // MakeInstruction calls CodeGen::MakeInstruction() and associated + // the returned address with a hash of the instruction. + CodeGen::Node MakeInstruction(uint16_t code, + uint32_t k, + CodeGen::Node jt = CodeGen::kNullNode, + CodeGen::Node jf = CodeGen::kNullNode) { + CodeGen::Node res = gen_.MakeInstruction(code, k, jt, jf); + EXPECT_NE(CodeGen::kNullNode, res); + + Hash digest(code, k, Lookup(jt), Lookup(jf)); + auto it = node_hashes_.insert(std::make_pair(res, digest)); + EXPECT_EQ(digest, it.first->second); + + return res; + } + + // RunTest compiles the program and verifies that the output matches + // what is expected. It should be called at the end of each program + // test case. + void RunTest(CodeGen::Node head) { + // Compile the program + CodeGen::Program program; + gen_.Compile(head, &program); + + // Walk the program backwards, and compute the hash for each instruction. + std::vector<Hash> prog_hashes(program.size()); + for (size_t i = program.size(); i > 0; --i) { + const sock_filter& insn = program.at(i - 1); + Hash& hash = prog_hashes.at(i - 1); + + if (BPF_CLASS(insn.code) == BPF_JMP) { + if (BPF_OP(insn.code) == BPF_JA) { + // The compiler adds JA instructions as needed, so skip them. + hash = prog_hashes.at(i + insn.k); + } else { + hash = Hash(insn.code, insn.k, prog_hashes.at(i + insn.jt), + prog_hashes.at(i + insn.jf)); + } + } else if (BPF_CLASS(insn.code) == BPF_RET) { + hash = Hash(insn.code, insn.k); + } else { + hash = Hash(insn.code, insn.k, prog_hashes.at(i)); + } + } + + EXPECT_EQ(Lookup(head), prog_hashes.at(0)); + } + + private: + const Hash& Lookup(CodeGen::Node next) const { + if (next == CodeGen::kNullNode) { + return Hash::kZero; + } + auto it = node_hashes_.find(next); + if (it == node_hashes_.end()) { + ADD_FAILURE() << "No hash found for node " << next; + return Hash::kZero; + } + return it->second; + } + + CodeGen gen_; + std::map<CodeGen::Node, Hash> node_hashes_; + + DISALLOW_COPY_AND_ASSIGN(ProgramTest); +}; + +TEST_F(ProgramTest, OneInstruction) { + // Create the most basic valid BPF program: + // RET 0 + CodeGen::Node head = MakeInstruction(BPF_RET + BPF_K, 0); + RunTest(head); +} + +TEST_F(ProgramTest, SimpleBranch) { + // Create a program with a single branch: + // JUMP if eq 42 then $0 else $1 + // 0: RET 1 + // 1: RET 0 + CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, + MakeInstruction(BPF_RET + BPF_K, 1), + MakeInstruction(BPF_RET + BPF_K, 0)); + RunTest(head); +} + +TEST_F(ProgramTest, AtypicalBranch) { + // Create a program with a single branch: + // JUMP if eq 42 then $0 else $0 + // 0: RET 0 + + CodeGen::Node ret = MakeInstruction(BPF_RET + BPF_K, 0); + CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret); + + // N.B.: As the instructions in both sides of the branch are already + // the same object, we do not actually have any "mergeable" branches. + // This needs to be reflected in our choice of "flags". + RunTest(head); +} + +TEST_F(ProgramTest, Complex) { + // Creates a basic BPF program that we'll use to test some of the code: + // JUMP if eq 42 the $0 else $1 (insn6) + // 0: LD 23 (insn5) + // 1: JUMP if eq 42 then $2 else $4 (insn4) + // 2: JUMP to $3 (insn2) + // 3: LD 42 (insn1) + // RET 42 (insn0) + // 4: LD 42 (insn3) + // RET 42 (insn3+) + CodeGen::Node insn0 = MakeInstruction(BPF_RET + BPF_K, 42); + CodeGen::Node insn1 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, insn0); + CodeGen::Node insn2 = insn1; // Implicit JUMP + + // We explicitly duplicate instructions to test that they're merged. + CodeGen::Node insn3 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, + MakeInstruction(BPF_RET + BPF_K, 42)); + EXPECT_EQ(insn2, insn3); + + CodeGen::Node insn4 = + MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn2, insn3); + CodeGen::Node insn5 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 23, insn4); + + // Force a basic block that ends in neither a jump instruction nor a return + // instruction. It only contains "insn5". This exercises one of the less + // common code paths in the topo-sort algorithm. + // This also gives us a diamond-shaped pattern in our graph, which stresses + // another aspect of the topo-sort algorithm (namely, the ability to + // correctly count the incoming branches for subtrees that are not disjunct). + CodeGen::Node insn6 = + MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4); + + RunTest(insn6); +} + +TEST_F(ProgramTest, ConfusingTails) { + // This simple program demonstrates https://crbug.com/351103/ + // The two "LOAD 0" instructions are blocks of their own. MergeTails() could + // be tempted to merge them since they are the same. However, they are + // not mergeable because they fall-through to non semantically equivalent + // blocks. + // Without the fix for this bug, this program should trigger the check in + // CompileAndCompare: the serialized graphs from the program and its compiled + // version will differ. + // + // 0) LOAD 1 // ??? + // 1) if A == 0x1; then JMP 2 else JMP 3 + // 2) LOAD 0 // System call number + // 3) if A == 0x2; then JMP 4 else JMP 5 + // 4) LOAD 0 // System call number + // 5) if A == 0x1; then JMP 6 else JMP 7 + // 6) RET 0 + // 7) RET 1 + + CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1); + CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0); + CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); + CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); + CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); + CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); + CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); + CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); + + RunTest(i0); +} + +TEST_F(ProgramTest, ConfusingTailsBasic) { + // Without the fix for https://crbug.com/351103/, (see + // SampleProgramConfusingTails()), this would generate a cyclic graph and + // crash as the two "LOAD 0" instructions would get merged. + // + // 0) LOAD 1 // ??? + // 1) if A == 0x1; then JMP 2 else JMP 3 + // 2) LOAD 0 // System call number + // 3) if A == 0x2; then JMP 4 else JMP 5 + // 4) LOAD 0 // System call number + // 5) RET 1 + + CodeGen::Node i5 = MakeInstruction(BPF_RET + BPF_K, 1); + CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); + CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); + CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); + CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); + CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); + + RunTest(i0); +} + +TEST_F(ProgramTest, ConfusingTailsMergeable) { + // This is similar to SampleProgramConfusingTails(), except that + // instructions 2 and 4 are now RET instructions. + // In PointerCompare(), this exercises the path where two blocks are of the + // same length and identical and the last instruction is a JMP or RET, so the + // following blocks don't need to be looked at and the blocks are mergeable. + // + // 0) LOAD 1 // ??? + // 1) if A == 0x1; then JMP 2 else JMP 3 + // 2) RET 42 + // 3) if A == 0x2; then JMP 4 else JMP 5 + // 4) RET 42 + // 5) if A == 0x1; then JMP 6 else JMP 7 + // 6) RET 0 + // 7) RET 1 + + CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1); + CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0); + CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); + CodeGen::Node i4 = MakeInstruction(BPF_RET + BPF_K, 42); + CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); + CodeGen::Node i2 = MakeInstruction(BPF_RET + BPF_K, 42); + CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); + CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); + + RunTest(i0); +} + +TEST_F(ProgramTest, InstructionFolding) { + // Check that simple instructions are folded as expected. + CodeGen::Node a = MakeInstruction(BPF_RET + BPF_K, 0); + EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0)); + CodeGen::Node b = MakeInstruction(BPF_RET + BPF_K, 1); + EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0)); + EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1)); + EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1)); + + // Check that complex sequences are folded too. + CodeGen::Node c = + MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, + MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b)); + EXPECT_EQ(c, MakeInstruction( + BPF_LD + BPF_W + BPF_ABS, 0, + MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b))); + + RunTest(c); +} + +TEST_F(ProgramTest, FarBranches) { + // BPF instructions use 8-bit fields for branch offsets, which means + // branch targets must be within 255 instructions of the branch + // instruction. CodeGen abstracts away this detail by inserting jump + // instructions as needed, which we test here by generating programs + // that should trigger any interesting boundary conditions. + + // Populate with 260 initial instruction nodes. + std::vector<CodeGen::Node> nodes; + nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0)); + for (size_t i = 1; i < 260; ++i) { + nodes.push_back( + MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back())); + } + + // Exhaustively test branch offsets near BPF's limits. + for (size_t jt = 250; jt < 260; ++jt) { + for (size_t jf = 250; jf < 260; ++jf) { + nodes.push_back(MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, + nodes.rbegin()[jt], nodes.rbegin()[jf])); + RunTest(nodes.back()); + } + } +} + +TEST_F(ProgramTest, JumpReuse) { + // As a code size optimization, we try to reuse jumps when possible + // instead of emitting new ones. Here we make sure that optimization + // is working as intended. + // + // NOTE: To simplify testing, we rely on implementation details + // about what CodeGen::Node values indicate (i.e., vector indices), + // but CodeGen users should treat them as opaque values. + + // Populate with 260 initial instruction nodes. + std::vector<CodeGen::Node> nodes; + nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0)); + for (size_t i = 1; i < 260; ++i) { + nodes.push_back( + MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back())); + } + + // Branching to nodes[0] and nodes[1] should require 3 new + // instructions: two far jumps plus the branch itself. + CodeGen::Node one = + MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, nodes[0], nodes[1]); + EXPECT_EQ(nodes.back() + 3, one); // XXX: Implementation detail! + RunTest(one); + + // Branching again to the same target nodes should require only one + // new instruction, as we can reuse the previous branch's jumps. + CodeGen::Node two = + MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, nodes[0], nodes[1]); + EXPECT_EQ(one + 1, two); // XXX: Implementation detail! + RunTest(two); +} + +} // namespace +} // namespace sandbox |