tools/compile_seccomp_policy: Add a new optimizing compiler am: 7a21ffee7c

am: 354d97ac4d Change-Id: Ic123f1132ab47b82881aa048cf6cea148c66ad41
author: Luis Hector Chavez <lhchavez@google.com> 2019-03-27 11:13:34 -0700
committer: android-build-merger <android-build-merger@google.com> 2019-03-27 11:13:34 -0700
commit: 4f2c4079a00fa154950640b1ca36606e3e278ac5 (patch)
tree: c008a2e83c4a644e4d33ec5de86a289e165c3335
parent: a995f4121f88f78eec5557cde53f6e8a400406b0 (diff)
parent: 354d97ac4d3e646b78410d3feb4ecc70ca8bc91f (diff)
download: minijail-4f2c4079a00fa154950640b1ca36606e3e278ac5.tar.gz
3 files changed, 335 insertions, 0 deletions
diff --git a/tools/compile_seccomp_policy.py b/tools/compile_seccomp_policy.py
new file mode 100755
index 0000000..62f8c1f
--- /dev/null
+++ b/tools/compile_seccomp_policy.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Helper tool to compile a BPF program from a Minijail seccomp filter.
+
+This script will take a Minijail seccomp policy file and compile it into a
+BPF program suitable for use with Minijail in the current architecture.
+"""
+
+from __future__ import print_function
+
+import argparse
+import sys
+
+import arch
+import bpf
+import compiler
+
+
+def parse_args(argv):
+    """Return the parsed CLI arguments for this tool."""
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        '--optimization-strategy',
+        default=compiler.OptimizationStrategy.BST,
+        type=compiler.OptimizationStrategy,
+        choices=list(compiler.OptimizationStrategy))
+    parser.add_argument('--include-depth-limit', default=10)
+    parser.add_argument('--arch-json', default='constants.json')
+    parser.add_argument(
+        '--use-kill-process',
+        action='store_true',
+        help=('Use SECCOMP_RET_KILL_PROCESS instead of '
+              'SECCOMP_RET_KILL_THREAD (requires Linux v4.14+).'))
+    parser.add_argument(
+        'policy', help='The seccomp policy.', type=argparse.FileType('r'))
+    parser.add_argument(
+        'output', help='The BPF program.', type=argparse.FileType('wb'))
+    return parser.parse_args(argv)
+
+
+def main(argv):
+    """Main entrypoint."""
+    opts = parse_args(argv)
+    policy_compiler = compiler.PolicyCompiler(
+        arch.Arch.load_from_json(opts.arch_json))
+    if opts.use_kill_process:
+        kill_action = bpf.KillProcess()
+    else:
+        kill_action = bpf.KillThread()
+    with opts.output as outf:
+        outf.write(
+            policy_compiler.compile_file(
+                opts.policy.name,
+                optimization_strategy=opts.optimization_strategy,
+                kill_action=kill_action,
+                include_depth_limit=opts.include_depth_limit).opcodes)
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv[1:]))
diff --git a/tools/compiler.py b/tools/compiler.py
index 96800f1..996b0da 100644
--- a/tools/compiler.py
+++ b/tools/compiler.py
@@ -18,10 +18,27 @@
 
 from __future__ import print_function
 
+import enum
+
 import bpf
 import parser  # pylint: disable=wrong-import-order
 
 
+class OptimizationStrategy(enum.Enum):
+    """The available optimization strategies."""
+
+    # Generate a linear chain of syscall number checks. Works best for policies
+    # with very few syscalls.
+    LINEAR = 'linear'
+
+    # Generate a binary search tree for the syscalls. Works best for policies
+    # with a lot of syscalls, where no one syscall dominates.
+    BST = 'bst'
+
+    def __str__(self):
+        return self.value
+
+
 class SyscallPolicyEntry:
     """The parsed version of a seccomp policy line."""
 
@@ -47,12 +64,168 @@ class SyscallPolicyEntry:
                             *args)
 
 
+def _compile_entries_linear(entries, accept_action, reject_action, visitor):
+    # Compiles the list of entries into a simple linear list of comparisons. In
+    # order to make the generated code a bit more efficient, we sort the
+    # entries by frequency, so that the most frequently-called syscalls appear
+    # earlier in the chain.
+    next_action = reject_action
+    entries.sort(key=lambda e: -e.frequency)
+    for entry in entries[::-1]:
+        if entry.filter:
+            next_action = bpf.SyscallEntry(entry.number, entry.filter,
+                                           next_action)
+            entry.filter.accept(visitor)
+        else:
+            next_action = bpf.SyscallEntry(entry.number, accept_action,
+                                           next_action)
+    return next_action
+
+
+def _compile_entries_bst(entries, accept_action, reject_action, visitor):
+    # Instead of generating a linear list of comparisons, this method generates
+    # a binary search tree.
+    #
+    # Even though we are going to perform a binary search over the syscall
+    # number, we would still like to rotate some of the internal nodes of the
+    # binary search tree so that more frequently-used syscalls can be accessed
+    # more cheaply (i.e. fewer internal nodes need to be traversed to reach
+    # them).
+    #
+    # The overall idea then is to, at any step, instead of naively partitioning
+    # the list of syscalls by the midpoint of the interval, we choose a
+    # midpoint that minimizes the difference of the sum of all frequencies
+    # between the left and right subtrees. For that, we need to sort the
+    # entries by syscall number and keep track of the accumulated frequency of
+    # all entries prior to the current one so that we can compue the midpoint
+    # efficiently.
+    #
+    # TODO(lhchavez): There is one further possible optimization, which is to
+    # hoist any syscalls that are more frequent than all other syscalls in the
+    # BST combined into a linear chain before entering the BST.
+    entries.sort(key=lambda e: e.number)
+    accumulated = 0
+    for entry in entries:
+        accumulated += entry.frequency
+        entry.accumulated = accumulated
+
+    # Recursively create the internal nodes.
+    def _generate_syscall_bst(entries, lower_bound=0, upper_bound=2**64 - 1):
+        assert entries
+        if len(entries) == 1:
+            # This is a single entry, but the interval we are currently
+            # considering contains other syscalls that we want to reject. So
+            # instead of an internal node, create a leaf node with an equality
+            # comparison.
+            assert lower_bound < upper_bound
+            if entries[0].filter:
+                entries[0].filter.accept(visitor)
+                return bpf.SyscallEntry(
+                    entries[0].number,
+                    entries[0].filter,
+                    reject_action,
+                    op=bpf.BPF_JEQ)
+            return bpf.SyscallEntry(
+                entries[0].number,
+                accept_action,
+                reject_action,
+                op=bpf.BPF_JEQ)
+
+        # Find the midpoint that minimizes the difference between accumulated
+        # costs in the left and right subtrees.
+        previous_accumulated = entries[0].accumulated - entries[0].frequency
+        last_accumulated = entries[-1].accumulated - previous_accumulated
+        best = (1e99, -1)
+        for i, entry in enumerate(entries):
+            if not i:
+                continue
+            left_accumulated = entry.accumulated - previous_accumulated
+            right_accumulated = last_accumulated - left_accumulated
+            best = min(best, (abs(left_accumulated - right_accumulated), i))
+        midpoint = best[1]
+        assert midpoint >= 1, best
+
+        # Now we build the right and left subtrees independently. If any of the
+        # subtrees consist of a single entry _and_ the bounds are tight around
+        # that entry (that is, the bounds contain _only_ the syscall we are
+        # going to consider), we can avoid emitting a leaf node and instead
+        # have the comparison jump directly into the action that would be taken
+        # by the entry.
+        if entries[midpoint].number == upper_bound:
+            if entries[midpoint].filter:
+                entries[midpoint].filter.accept(visitor)
+                right_subtree = entries[midpoint].filter
+            else:
+                right_subtree = accept_action
+        else:
+            right_subtree = _generate_syscall_bst(
+                entries[midpoint:], entries[midpoint].number, upper_bound)
+
+        if lower_bound == entries[midpoint].number - 1:
+            assert entries[midpoint - 1].number == lower_bound
+            if entries[midpoint - 1].filter:
+                entries[midpoint - 1].filter.accept(visitor)
+                left_subtree = entries[midpoint - 1].filter
+            else:
+                left_subtree = accept_action
+        else:
+            left_subtree = _generate_syscall_bst(
+                entries[:midpoint], lower_bound, entries[midpoint].number - 1)
+
+        # Finally, now that both subtrees have been generated, we can create
+        # the internal node of the binary search tree.
+        return bpf.SyscallEntry(
+            entries[midpoint].number,
+            right_subtree,
+            left_subtree,
+            op=bpf.BPF_JGE)
+
+    return _generate_syscall_bst(entries)
+
+
 class PolicyCompiler:
     """A parser for the Minijail seccomp policy file format."""
 
     def __init__(self, arch):
         self._arch = arch
 
+    def compile_file(self,
+                     policy_filename,
+                     *,
+                     optimization_strategy,
+                     kill_action,
+                     include_depth_limit=10):
+        """Return a compiled BPF program from the provided policy file."""
+        policy_parser = parser.PolicyParser(
+            self._arch,
+            kill_action=kill_action,
+            include_depth_limit=include_depth_limit)
+        parsed_policy = policy_parser.parse_file(policy_filename)
+        entries = [
+            self.compile_filter_statement(
+                filter_statement, kill_action=kill_action)
+            for filter_statement in parsed_policy.filter_statements
+        ]
+
+        visitor = bpf.FlatteningVisitor(
+            arch=self._arch, kill_action=kill_action)
+        accept_action = bpf.Allow()
+        reject_action = parsed_policy.default_action
+        if entries:
+            if optimization_strategy == OptimizationStrategy.BST:
+                next_action = _compile_entries_bst(entries, accept_action,
+                                                   reject_action, visitor)
+            else:
+                next_action = _compile_entries_linear(entries, accept_action,
+                                                      reject_action, visitor)
+            reject_action.accept(visitor)
+            accept_action.accept(visitor)
+            bpf.ValidateArch(next_action).accept(visitor)
+        else:
+            reject_action.accept(visitor)
+            bpf.ValidateArch(reject_action).accept(visitor)
+        return visitor.result
+
     def compile_filter_statement(self, filter_statement, *, kill_action):
         """Compile one parser.FilterStatement into BPF."""
         policy_entry = SyscallPolicyEntry(filter_statement.syscall.name,
diff --git a/tools/compiler_unittest.py b/tools/compiler_unittest.py
index ba66e62..2f0b0bd 100755
--- a/tools/compiler_unittest.py
+++ b/tools/compiler_unittest.py
@@ -272,5 +272,92 @@ class CompileFilterStatementTests(unittest.TestCase):
                     'ALLOW')
 
 
+class CompileFileTests(unittest.TestCase):
+    """Tests for PolicyCompiler.compile_file."""
+
+    def setUp(self):
+        self.arch = ARCH_64
+        self.compiler = compiler.PolicyCompiler(self.arch)
+        self.tempdir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.tempdir)
+
+    def _write_file(self, filename, contents):
+        """Helper to write out a file for testing."""
+        path = os.path.join(self.tempdir, filename)
+        with open(path, 'w') as outf:
+            outf.write(contents)
+        return path
+
+    def test_compile_linear(self):
+        """Reject empty / malformed lines."""
+        self._write_file(
+            'test.frequency', """
+            read: 1
+            close: 10
+        """)
+        path = self._write_file(
+            'test.policy', """
+            @frequency ./test.frequency
+            read: 1
+            close: 1
+        """)
+
+        program = self.compiler.compile_file(
+            path,
+            optimization_strategy=compiler.OptimizationStrategy.LINEAR,
+            kill_action=bpf.KillProcess())
+        self.assertGreater(
+            bpf.simulate(program.instructions, self.arch.arch_nr,
+                         self.arch.syscalls['read'], 0)[0],
+            bpf.simulate(program.instructions, self.arch.arch_nr,
+                         self.arch.syscalls['close'], 0)[0],
+        )
+
+    def test_compile_bst(self):
+        """Ensure compilation with BST is cheaper than the linear model."""
+        self._write_file(
+            'test.frequency', """
+            read: 1
+            close: 10
+        """)
+        path = self._write_file(
+            'test.policy', """
+            @frequency ./test.frequency
+            read: 1
+            close: 1
+        """)
+
+        program = self.compiler.compile_file(
+            path,
+            optimization_strategy=compiler.OptimizationStrategy.BST,
+            kill_action=bpf.KillProcess())
+        # BST for very few syscalls does not make a lot of sense and does
+        # introduce some overhead, so there will be no checking for cost.
+        self.assertEqual(
+            bpf.simulate(program.instructions, self.arch.arch_nr,
+                         self.arch.syscalls['read'], 0)[1], 'ALLOW')
+        self.assertEqual(
+            bpf.simulate(program.instructions, self.arch.arch_nr,
+                         self.arch.syscalls['close'], 0)[1], 'ALLOW')
+
+    def test_compile_empty_file(self):
+        """Accept empty files."""
+        path = self._write_file(
+            'test.policy', """
+            @default kill-thread
+        """)
+
+        for strategy in list(compiler.OptimizationStrategy):
+            program = self.compiler.compile_file(
+                path,
+                optimization_strategy=strategy,
+                kill_action=bpf.KillProcess())
+            self.assertEqual(
+                bpf.simulate(program.instructions, self.arch.arch_nr,
+                             self.arch.syscalls['read'], 0)[1], 'KILL_THREAD')
+
+
 if __name__ == '__main__':
     unittest.main()
author	Luis Hector Chavez <lhchavez@google.com>	2019-03-27 11:13:34 -0700
committer	android-build-merger <android-build-merger@google.com>	2019-03-27 11:13:34 -0700
commit	4f2c4079a00fa154950640b1ca36606e3e278ac5 (patch)
tree	c008a2e83c4a644e4d33ec5de86a289e165c3335
parent	a995f4121f88f78eec5557cde53f6e8a400406b0 (diff)
parent	354d97ac4d3e646b78410d3feb4ecc70ca8bc91f (diff)
download	minijail-4f2c4079a00fa154950640b1ca36606e3e278ac5.tar.gz