aboutsummaryrefslogtreecommitdiff
path: root/tools/generate_seccomp_policy.py
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2021-07-14 00:45:05 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2021-07-14 00:45:05 +0000
commit1eb32343c195ac4920f4d37c751039be00838bb7 (patch)
tree832aaffc3276b8ebfc0960c3d4bc4138ad8a6490 /tools/generate_seccomp_policy.py
parentdd66b18a9fab1707c59befaa544aaac713e4f5f8 (diff)
parent70e1cac63d9545c24b6667f99813e868f6c75c80 (diff)
downloadminijail-1eb32343c195ac4920f4d37c751039be00838bb7.tar.gz
Change-Id: I56917a96b7cd67c0eebefdca3125ea4b393559e4
Diffstat (limited to 'tools/generate_seccomp_policy.py')
-rwxr-xr-xtools/generate_seccomp_policy.py204
1 files changed, 191 insertions, 13 deletions
diff --git a/tools/generate_seccomp_policy.py b/tools/generate_seccomp_policy.py
index 2cfb611..3654123 100755
--- a/tools/generate_seccomp_policy.py
+++ b/tools/generate_seccomp_policy.py
@@ -18,15 +18,22 @@
# This script will take any number of trace files generated by strace(1)
# and output a system call filtering policy suitable for use with Minijail.
-"""Helper tool to generate a minijail seccomp filter from strace output."""
+"""Tool to generate a minijail seccomp filter from strace or audit output."""
from __future__ import print_function
import argparse
import collections
+import os
import re
import sys
+# auparse may not be installed and is currently optional.
+try:
+ import auparse
+except ImportError:
+ auparse = None
+
NOTICE = """# Copyright (C) 2018 The Android Open Source Project
#
@@ -55,23 +62,90 @@ SOCKETCALLS = {
'setsockopt', 'shutdown', 'socket', 'socketpair',
}
+# List of private ARM syscalls. These can be found in any ARM specific unistd.h
+# such as Linux's arch/arm/include/uapi/asm/unistd.h.
+PRIVATE_ARM_SYSCALLS = {
+ 983041: 'ARM_breakpoint',
+ 983042: 'ARM_cacheflush',
+ 983043: 'ARM_usr26',
+ 983044: 'ARM_usr32',
+ 983045: 'ARM_set_tls',
+}
+
ArgInspectionEntry = collections.namedtuple('ArgInspectionEntry',
('arg_index', 'value_set'))
+# pylint: disable=too-few-public-methods
+class BucketInputFiles(argparse.Action):
+ """Buckets input files using simple content based heuristics.
+
+ Attributes:
+ audit_logs: Mutually exclusive list of audit log filenames.
+ traces: Mutually exclusive list of strace log filenames.
+ """
+ def __call__(self, parser, namespace, values, option_string=None):
+ audit_logs = []
+ traces = []
+
+ strace_line_re = re.compile(r'[a-z]+[0-9]*\(.+\) += ')
+ audit_line_re = re.compile(r'type=(SYSCALL|SECCOMP)')
+
+ for filename in values:
+ if not os.path.exists(filename):
+ parser.error(f'Input file {filename} not found.')
+ with open(filename, mode='r', encoding='utf8') as input_file:
+ for line in input_file.readlines():
+ if strace_line_re.search(line):
+ traces.append(filename)
+ break
+ if audit_line_re.search(line):
+ audit_logs.append(filename)
+ break
+ else:
+ # Treat it as an strace log to retain legacy behaviour and
+ # also just in case the strace regex is imperfect.
+ traces.append(filename)
+
+ setattr(namespace, 'audit_logs', audit_logs)
+ setattr(namespace, 'traces', traces)
+# pylint: enable=too-few-public-methods
+
+
def parse_args(argv):
"""Returns the parsed CLI arguments for this tool."""
parser = argparse.ArgumentParser(description=__doc__)
- parser.add_argument('--frequency', nargs='?', type=argparse.FileType('w'),
+ parser.add_argument('--verbose', action='store_true',
+ help='output informational messages to stderr')
+ parser.add_argument('--frequency', type=argparse.FileType('w'),
help='frequency file')
- parser.add_argument('--policy', nargs='?', type=argparse.FileType('w'),
+ parser.add_argument('--policy', type=argparse.FileType('w'),
default=sys.stdout, help='policy file')
- parser.add_argument('traces', nargs='+', help='The strace logs.')
- return parser.parse_args(argv)
+ parser.add_argument('input-logs', action=BucketInputFiles,
+ help='strace and/or audit logs', nargs='+')
+ parser.add_argument('--audit-comm', type=str, metavar='PROCESS_NAME',
+ help='relevant process name from the audit.log files')
+ opts = parser.parse_args(argv)
+
+ if opts.audit_logs and not auparse:
+ parser.error('Python bindings for the audit subsystem were not found.\n'
+ 'Please install the python3-audit (sometimes python-audit)'
+ ' package for your distro to process audit logs: '
+ f'{opts.audit_logs}')
+
+ if opts.audit_logs and not opts.audit_comm:
+ parser.error(f'--audit-comm is required when using audit logs as input:'
+ f' {opts.audit_logs}')
+
+ if not opts.audit_logs and opts.audit_comm:
+ parser.error('--audit-comm was specified yet none of the input files '
+ 'matched our hueristic for an audit log')
+
+ return opts
def get_seccomp_bpf_filter(syscall, entry):
- """Return a minijail seccomp-bpf filter expression for the syscall."""
+ """Returns a minijail seccomp-bpf filter expression for the syscall."""
arg_index = entry.arg_index
arg_values = entry.value_set
atoms = []
@@ -87,8 +161,7 @@ def get_seccomp_bpf_filter(syscall, entry):
else:
atoms.extend(['arg2 in ~PROT_EXEC', 'arg2 in ~PROT_WRITE'])
arg_values = set()
- atoms.extend('arg%d == %s' % (arg_index, arg_value)
- for arg_value in arg_values)
+ atoms.extend(f'arg{arg_index} == {arg_value}' for arg_value in arg_values)
return ' || '.join(atoms)
@@ -98,7 +171,7 @@ def parse_trace_file(trace_filename, syscalls, arg_inspection):
('x86' in trace_filename and
'64' not in trace_filename))
- with open(trace_filename) as trace_file:
+ with open(trace_filename, encoding='utf8') as trace_file:
for line in trace_file:
matches = LINE_RE.match(line)
if not matches:
@@ -108,6 +181,13 @@ def parse_trace_file(trace_filename, syscalls, arg_inspection):
if uses_socketcall and syscall in SOCKETCALLS:
syscall = 'socketcall'
+ # strace omits the 'ARM_' prefix on all private ARM syscalls. Add
+ # it manually here as a workaround. These syscalls are exclusive
+ # to ARM so we don't need to predicate this on a trace_filename
+ # based heuristic for the arch.
+ if f'ARM_{syscall}' in PRIVATE_ARM_SYSCALLS.values():
+ syscall = f'ARM_{syscall}'
+
syscalls[syscall] += 1
args = [arg.strip() for arg in args.split(',')]
@@ -117,6 +197,92 @@ def parse_trace_file(trace_filename, syscalls, arg_inspection):
arg_inspection[syscall].value_set.add(arg_value)
+def parse_audit_log(audit_log, audit_comm, syscalls, arg_inspection):
+ """Parses one audit.log file generated by the Linux audit subsystem."""
+
+ unknown_syscall_re = re.compile(r'unknown-syscall\((?P<syscall_num>\d+)\)')
+
+ au = auparse.AuParser(auparse.AUSOURCE_FILE, audit_log)
+ # Quick validity check for whether this parses as a valid audit log. The
+ # first event should have at least one record.
+ if not au.first_record():
+ raise ValueError(f'Unable to parse audit log file {audit_log.name}')
+
+ # Iterate through events where _any_ contained record matches
+ # ((type == SECCOMP || type == SYSCALL) && comm == audit_comm).
+ au.search_add_item('type', '=', 'SECCOMP', auparse.AUSEARCH_RULE_CLEAR)
+ au.search_add_item('type', '=', 'SYSCALL', auparse.AUSEARCH_RULE_OR)
+ au.search_add_item('comm', '=', f'"{audit_comm}"',
+ auparse.AUSEARCH_RULE_AND)
+
+ # auparse_find_field(3) will ignore preceding fields in the record and
+ # at the same time happily cross record boundaries when looking for the
+ # field. This helper method always seeks the cursor back to the first
+ # field in the record and stops searching before crossing over to the
+ # next record; making the search far less error prone.
+ # Also implicitly seeks the internal 'cursor' to the matching field
+ # for any subsequent calls like auparse_interpret_field.
+ def _find_field_in_current_record(name):
+ au.first_field()
+ while True:
+ if au.get_field_name() == name:
+ return au.get_field_str()
+ if not au.next_field():
+ return None
+
+ while au.search_next_event():
+ # The event may have multiple records. Loop through all.
+ au.first_record()
+ for _ in range(au.get_num_records()):
+ event_type = _find_field_in_current_record('type')
+ comm = _find_field_in_current_record('comm')
+ # Some of the records in this event may not be relevant
+ # despite the event-specific search filter. Skip those.
+ if (event_type not in ('SECCOMP', 'SYSCALL') or
+ comm != f'"{audit_comm}"'):
+ au.next_record()
+ continue
+
+ if not _find_field_in_current_record('syscall'):
+ raise ValueError(f'Could not find field "syscall" in event of '
+ f'type {event_type}')
+ # Intepret the syscall field that's under our 'cursor' following the
+ # find. Interpreting fields yields human friendly names instead
+ # of integers. E.g '16' -> 'ioctl'.
+ syscall = au.interpret_field()
+
+ # TODO(crbug/1172449): Add these syscalls to upstream
+ # audit-userspace and remove this workaround.
+ # This is redundant but safe for non-ARM architectures due to the
+ # disjoint set of private syscall numbers.
+ match = unknown_syscall_re.match(syscall)
+ if match:
+ syscall_num = int(match.group('syscall_num'))
+ syscall = PRIVATE_ARM_SYSCALLS.get(syscall_num, syscall)
+
+ if ((syscall in arg_inspection and event_type == 'SECCOMP') or
+ (syscall not in arg_inspection and event_type == 'SYSCALL')):
+ # Skip SECCOMP records for syscalls that require argument
+ # inspection. Similarly, skip SYSCALL records for syscalls
+ # that do not require argument inspection. Technically such
+ # records wouldn't exist per our setup instructions but audit
+ # sometimes lets a few records slip through.
+ au.next_record()
+ continue
+ elif event_type == 'SYSCALL':
+ arg_field_name = f'a{arg_inspection[syscall].arg_index}'
+ if not _find_field_in_current_record(arg_field_name):
+ raise ValueError(f'Could not find field "{arg_field_name}"'
+ f'in event of type {event_type}')
+ # Intepret the arg field that's under our 'cursor' following the
+ # find. This may yield a more human friendly name.
+ # E.g '5401' -> 'TCGETS'.
+ arg_inspection[syscall].value_set.add(au.interpret_field())
+
+ syscalls[syscall] += 1
+ au.next_record()
+
+
def main(argv=None):
"""Main entrypoint."""
@@ -136,9 +302,20 @@ def main(argv=None):
'mprotect': ArgInspectionEntry(2, set([])), # int prot
}
+ if opts.verbose:
+ # Print an informational message to stderr in case the filetype detection
+ # heuristics are wonky.
+ print('Generating a seccomp policy using these input files:',
+ file=sys.stderr)
+ print(f'Strace logs: {opts.traces}', file=sys.stderr)
+ print(f'Audit logs: {opts.audit_logs}', file=sys.stderr)
+
for trace_filename in opts.traces:
parse_trace_file(trace_filename, syscalls, arg_inspection)
+ for audit_log in opts.audit_logs:
+ parse_audit_log(audit_log, opts.audit_comm, syscalls, arg_inspection)
+
# Add the basic set if they are not yet present.
basic_set = [
'restart_syscall', 'exit', 'exit_group', 'rt_sigreturn',
@@ -168,13 +345,14 @@ def main(argv=None):
for syscall in sorted_syscalls:
if syscall in arg_inspection:
- arg_filter = get_seccomp_bpf_filter(syscall, arg_inspection[syscall])
+ arg_filter = get_seccomp_bpf_filter(syscall,
+ arg_inspection[syscall])
else:
arg_filter = ALLOW
- print('%s: %s' % (syscall, arg_filter), file=opts.policy)
+ print(f'{syscall}: {arg_filter}', file=opts.policy)
if opts.frequency is not None:
- print('%s: %s' % (syscall, syscalls[syscall]),
- file=opts.frequency)
+ print(f'{syscall}: {syscalls[syscall]}', file=opts.frequency)
+
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))