diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2021-07-14 00:45:05 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2021-07-14 00:45:05 +0000 |
commit | 1eb32343c195ac4920f4d37c751039be00838bb7 (patch) | |
tree | 832aaffc3276b8ebfc0960c3d4bc4138ad8a6490 /tools/generate_seccomp_policy.py | |
parent | dd66b18a9fab1707c59befaa544aaac713e4f5f8 (diff) | |
parent | 70e1cac63d9545c24b6667f99813e868f6c75c80 (diff) | |
download | minijail-1eb32343c195ac4920f4d37c751039be00838bb7.tar.gz |
Snap for 7547121 from 70e1cac63d9545c24b6667f99813e868f6c75c80 to mainline-permission-releaseandroid-mainline-12.0.0_r93android-mainline-12.0.0_r79android-mainline-12.0.0_r64android-mainline-12.0.0_r51android-mainline-12.0.0_r34android-mainline-12.0.0_r15android-mainline-12.0.0_r123android-mainline-12.0.0_r107android12-mainline-permission-release
Change-Id: I56917a96b7cd67c0eebefdca3125ea4b393559e4
Diffstat (limited to 'tools/generate_seccomp_policy.py')
-rwxr-xr-x | tools/generate_seccomp_policy.py | 204 |
1 files changed, 191 insertions, 13 deletions
diff --git a/tools/generate_seccomp_policy.py b/tools/generate_seccomp_policy.py index 2cfb611..3654123 100755 --- a/tools/generate_seccomp_policy.py +++ b/tools/generate_seccomp_policy.py @@ -18,15 +18,22 @@ # This script will take any number of trace files generated by strace(1) # and output a system call filtering policy suitable for use with Minijail. -"""Helper tool to generate a minijail seccomp filter from strace output.""" +"""Tool to generate a minijail seccomp filter from strace or audit output.""" from __future__ import print_function import argparse import collections +import os import re import sys +# auparse may not be installed and is currently optional. +try: + import auparse +except ImportError: + auparse = None + NOTICE = """# Copyright (C) 2018 The Android Open Source Project # @@ -55,23 +62,90 @@ SOCKETCALLS = { 'setsockopt', 'shutdown', 'socket', 'socketpair', } +# List of private ARM syscalls. These can be found in any ARM specific unistd.h +# such as Linux's arch/arm/include/uapi/asm/unistd.h. +PRIVATE_ARM_SYSCALLS = { + 983041: 'ARM_breakpoint', + 983042: 'ARM_cacheflush', + 983043: 'ARM_usr26', + 983044: 'ARM_usr32', + 983045: 'ARM_set_tls', +} + ArgInspectionEntry = collections.namedtuple('ArgInspectionEntry', ('arg_index', 'value_set')) +# pylint: disable=too-few-public-methods +class BucketInputFiles(argparse.Action): + """Buckets input files using simple content based heuristics. + + Attributes: + audit_logs: Mutually exclusive list of audit log filenames. + traces: Mutually exclusive list of strace log filenames. + """ + def __call__(self, parser, namespace, values, option_string=None): + audit_logs = [] + traces = [] + + strace_line_re = re.compile(r'[a-z]+[0-9]*\(.+\) += ') + audit_line_re = re.compile(r'type=(SYSCALL|SECCOMP)') + + for filename in values: + if not os.path.exists(filename): + parser.error(f'Input file {filename} not found.') + with open(filename, mode='r', encoding='utf8') as input_file: + for line in input_file.readlines(): + if strace_line_re.search(line): + traces.append(filename) + break + if audit_line_re.search(line): + audit_logs.append(filename) + break + else: + # Treat it as an strace log to retain legacy behaviour and + # also just in case the strace regex is imperfect. + traces.append(filename) + + setattr(namespace, 'audit_logs', audit_logs) + setattr(namespace, 'traces', traces) +# pylint: enable=too-few-public-methods + + def parse_args(argv): """Returns the parsed CLI arguments for this tool.""" parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument('--frequency', nargs='?', type=argparse.FileType('w'), + parser.add_argument('--verbose', action='store_true', + help='output informational messages to stderr') + parser.add_argument('--frequency', type=argparse.FileType('w'), help='frequency file') - parser.add_argument('--policy', nargs='?', type=argparse.FileType('w'), + parser.add_argument('--policy', type=argparse.FileType('w'), default=sys.stdout, help='policy file') - parser.add_argument('traces', nargs='+', help='The strace logs.') - return parser.parse_args(argv) + parser.add_argument('input-logs', action=BucketInputFiles, + help='strace and/or audit logs', nargs='+') + parser.add_argument('--audit-comm', type=str, metavar='PROCESS_NAME', + help='relevant process name from the audit.log files') + opts = parser.parse_args(argv) + + if opts.audit_logs and not auparse: + parser.error('Python bindings for the audit subsystem were not found.\n' + 'Please install the python3-audit (sometimes python-audit)' + ' package for your distro to process audit logs: ' + f'{opts.audit_logs}') + + if opts.audit_logs and not opts.audit_comm: + parser.error(f'--audit-comm is required when using audit logs as input:' + f' {opts.audit_logs}') + + if not opts.audit_logs and opts.audit_comm: + parser.error('--audit-comm was specified yet none of the input files ' + 'matched our hueristic for an audit log') + + return opts def get_seccomp_bpf_filter(syscall, entry): - """Return a minijail seccomp-bpf filter expression for the syscall.""" + """Returns a minijail seccomp-bpf filter expression for the syscall.""" arg_index = entry.arg_index arg_values = entry.value_set atoms = [] @@ -87,8 +161,7 @@ def get_seccomp_bpf_filter(syscall, entry): else: atoms.extend(['arg2 in ~PROT_EXEC', 'arg2 in ~PROT_WRITE']) arg_values = set() - atoms.extend('arg%d == %s' % (arg_index, arg_value) - for arg_value in arg_values) + atoms.extend(f'arg{arg_index} == {arg_value}' for arg_value in arg_values) return ' || '.join(atoms) @@ -98,7 +171,7 @@ def parse_trace_file(trace_filename, syscalls, arg_inspection): ('x86' in trace_filename and '64' not in trace_filename)) - with open(trace_filename) as trace_file: + with open(trace_filename, encoding='utf8') as trace_file: for line in trace_file: matches = LINE_RE.match(line) if not matches: @@ -108,6 +181,13 @@ def parse_trace_file(trace_filename, syscalls, arg_inspection): if uses_socketcall and syscall in SOCKETCALLS: syscall = 'socketcall' + # strace omits the 'ARM_' prefix on all private ARM syscalls. Add + # it manually here as a workaround. These syscalls are exclusive + # to ARM so we don't need to predicate this on a trace_filename + # based heuristic for the arch. + if f'ARM_{syscall}' in PRIVATE_ARM_SYSCALLS.values(): + syscall = f'ARM_{syscall}' + syscalls[syscall] += 1 args = [arg.strip() for arg in args.split(',')] @@ -117,6 +197,92 @@ def parse_trace_file(trace_filename, syscalls, arg_inspection): arg_inspection[syscall].value_set.add(arg_value) +def parse_audit_log(audit_log, audit_comm, syscalls, arg_inspection): + """Parses one audit.log file generated by the Linux audit subsystem.""" + + unknown_syscall_re = re.compile(r'unknown-syscall\((?P<syscall_num>\d+)\)') + + au = auparse.AuParser(auparse.AUSOURCE_FILE, audit_log) + # Quick validity check for whether this parses as a valid audit log. The + # first event should have at least one record. + if not au.first_record(): + raise ValueError(f'Unable to parse audit log file {audit_log.name}') + + # Iterate through events where _any_ contained record matches + # ((type == SECCOMP || type == SYSCALL) && comm == audit_comm). + au.search_add_item('type', '=', 'SECCOMP', auparse.AUSEARCH_RULE_CLEAR) + au.search_add_item('type', '=', 'SYSCALL', auparse.AUSEARCH_RULE_OR) + au.search_add_item('comm', '=', f'"{audit_comm}"', + auparse.AUSEARCH_RULE_AND) + + # auparse_find_field(3) will ignore preceding fields in the record and + # at the same time happily cross record boundaries when looking for the + # field. This helper method always seeks the cursor back to the first + # field in the record and stops searching before crossing over to the + # next record; making the search far less error prone. + # Also implicitly seeks the internal 'cursor' to the matching field + # for any subsequent calls like auparse_interpret_field. + def _find_field_in_current_record(name): + au.first_field() + while True: + if au.get_field_name() == name: + return au.get_field_str() + if not au.next_field(): + return None + + while au.search_next_event(): + # The event may have multiple records. Loop through all. + au.first_record() + for _ in range(au.get_num_records()): + event_type = _find_field_in_current_record('type') + comm = _find_field_in_current_record('comm') + # Some of the records in this event may not be relevant + # despite the event-specific search filter. Skip those. + if (event_type not in ('SECCOMP', 'SYSCALL') or + comm != f'"{audit_comm}"'): + au.next_record() + continue + + if not _find_field_in_current_record('syscall'): + raise ValueError(f'Could not find field "syscall" in event of ' + f'type {event_type}') + # Intepret the syscall field that's under our 'cursor' following the + # find. Interpreting fields yields human friendly names instead + # of integers. E.g '16' -> 'ioctl'. + syscall = au.interpret_field() + + # TODO(crbug/1172449): Add these syscalls to upstream + # audit-userspace and remove this workaround. + # This is redundant but safe for non-ARM architectures due to the + # disjoint set of private syscall numbers. + match = unknown_syscall_re.match(syscall) + if match: + syscall_num = int(match.group('syscall_num')) + syscall = PRIVATE_ARM_SYSCALLS.get(syscall_num, syscall) + + if ((syscall in arg_inspection and event_type == 'SECCOMP') or + (syscall not in arg_inspection and event_type == 'SYSCALL')): + # Skip SECCOMP records for syscalls that require argument + # inspection. Similarly, skip SYSCALL records for syscalls + # that do not require argument inspection. Technically such + # records wouldn't exist per our setup instructions but audit + # sometimes lets a few records slip through. + au.next_record() + continue + elif event_type == 'SYSCALL': + arg_field_name = f'a{arg_inspection[syscall].arg_index}' + if not _find_field_in_current_record(arg_field_name): + raise ValueError(f'Could not find field "{arg_field_name}"' + f'in event of type {event_type}') + # Intepret the arg field that's under our 'cursor' following the + # find. This may yield a more human friendly name. + # E.g '5401' -> 'TCGETS'. + arg_inspection[syscall].value_set.add(au.interpret_field()) + + syscalls[syscall] += 1 + au.next_record() + + def main(argv=None): """Main entrypoint.""" @@ -136,9 +302,20 @@ def main(argv=None): 'mprotect': ArgInspectionEntry(2, set([])), # int prot } + if opts.verbose: + # Print an informational message to stderr in case the filetype detection + # heuristics are wonky. + print('Generating a seccomp policy using these input files:', + file=sys.stderr) + print(f'Strace logs: {opts.traces}', file=sys.stderr) + print(f'Audit logs: {opts.audit_logs}', file=sys.stderr) + for trace_filename in opts.traces: parse_trace_file(trace_filename, syscalls, arg_inspection) + for audit_log in opts.audit_logs: + parse_audit_log(audit_log, opts.audit_comm, syscalls, arg_inspection) + # Add the basic set if they are not yet present. basic_set = [ 'restart_syscall', 'exit', 'exit_group', 'rt_sigreturn', @@ -168,13 +345,14 @@ def main(argv=None): for syscall in sorted_syscalls: if syscall in arg_inspection: - arg_filter = get_seccomp_bpf_filter(syscall, arg_inspection[syscall]) + arg_filter = get_seccomp_bpf_filter(syscall, + arg_inspection[syscall]) else: arg_filter = ALLOW - print('%s: %s' % (syscall, arg_filter), file=opts.policy) + print(f'{syscall}: {arg_filter}', file=opts.policy) if opts.frequency is not None: - print('%s: %s' % (syscall, syscalls[syscall]), - file=opts.frequency) + print(f'{syscall}: {syscalls[syscall]}', file=opts.frequency) + if __name__ == '__main__': sys.exit(main(sys.argv[1:])) |