Snap for 7547121 from 70e1cac63d9545c24b6667f99813e868f6c75c80 to mainline-permission-releaseandroid-mainline-12.0.0_r93 android-mainline-12.0.0_r79 android-mainline-12.0.0_r64 android-mainline-12.0.0_r51 android-mainline-12.0.0_r34 android-mainline-12.0.0_r15 android-mainline-12.0.0_r123 android-mainline-12.0.0_r107 android12-mainline-permission-release

Change-Id: I56917a96b7cd67c0eebefdca3125ea4b393559e4
author: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2021-07-14 00:45:05 +0000
committer: Android Build Coastguard Worker <android-build-coastguard-worker@google.com> 2021-07-14 00:45:05 +0000
commit: 1eb32343c195ac4920f4d37c751039be00838bb7 (patch)
tree: 832aaffc3276b8ebfc0960c3d4bc4138ad8a6490 /tools/generate_seccomp_policy.py
parent: dd66b18a9fab1707c59befaa544aaac713e4f5f8 (diff)
parent: 70e1cac63d9545c24b6667f99813e868f6c75c80 (diff)
download: minijail-1eb32343c195ac4920f4d37c751039be00838bb7.tar.gz
1 files changed, 191 insertions, 13 deletions
diff --git a/tools/generate_seccomp_policy.py b/tools/generate_seccomp_policy.py
index 2cfb611..3654123 100755
--- a/tools/generate_seccomp_policy.py
+++ b/tools/generate_seccomp_policy.py
@@ -18,15 +18,22 @@
 # This script will take any number of trace files generated by strace(1)
 # and output a system call filtering policy suitable for use with Minijail.
 
-"""Helper tool to generate a minijail seccomp filter from strace output."""
+"""Tool to generate a minijail seccomp filter from strace or audit output."""
 
 from __future__ import print_function
 
 import argparse
 import collections
+import os
 import re
 import sys
 
+# auparse may not be installed and is currently optional.
+try:
+    import auparse
+except ImportError:
+    auparse = None
+
 
 NOTICE = """# Copyright (C) 2018 The Android Open Source Project
 #
@@ -55,23 +62,90 @@ SOCKETCALLS = {
     'setsockopt', 'shutdown', 'socket', 'socketpair',
 }
 
+# List of private ARM syscalls. These can be found in any ARM specific unistd.h
+# such as Linux's arch/arm/include/uapi/asm/unistd.h.
+PRIVATE_ARM_SYSCALLS = {
+    983041: 'ARM_breakpoint',
+    983042: 'ARM_cacheflush',
+    983043: 'ARM_usr26',
+    983044: 'ARM_usr32',
+    983045: 'ARM_set_tls',
+}
+
 ArgInspectionEntry = collections.namedtuple('ArgInspectionEntry',
                                             ('arg_index', 'value_set'))
 
 
+# pylint: disable=too-few-public-methods
+class BucketInputFiles(argparse.Action):
+    """Buckets input files using simple content based heuristics.
+
+    Attributes:
+      audit_logs: Mutually exclusive list of audit log filenames.
+      traces: Mutually exclusive list of strace log filenames.
+    """
+    def __call__(self, parser, namespace, values, option_string=None):
+        audit_logs = []
+        traces = []
+
+        strace_line_re = re.compile(r'[a-z]+[0-9]*\(.+\) += ')
+        audit_line_re = re.compile(r'type=(SYSCALL|SECCOMP)')
+
+        for filename in values:
+            if not os.path.exists(filename):
+                parser.error(f'Input file {filename} not found.')
+            with open(filename, mode='r', encoding='utf8') as input_file:
+                for line in input_file.readlines():
+                    if strace_line_re.search(line):
+                        traces.append(filename)
+                        break
+                    if audit_line_re.search(line):
+                        audit_logs.append(filename)
+                        break
+                else:
+                    # Treat it as an strace log to retain legacy behaviour and
+                    # also just in case the strace regex is imperfect.
+                    traces.append(filename)
+
+        setattr(namespace, 'audit_logs', audit_logs)
+        setattr(namespace, 'traces', traces)
+# pylint: enable=too-few-public-methods
+
+
 def parse_args(argv):
     """Returns the parsed CLI arguments for this tool."""
     parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument('--frequency', nargs='?', type=argparse.FileType('w'),
+    parser.add_argument('--verbose', action='store_true',
+                        help='output informational messages to stderr')
+    parser.add_argument('--frequency', type=argparse.FileType('w'),
                         help='frequency file')
-    parser.add_argument('--policy', nargs='?', type=argparse.FileType('w'),
+    parser.add_argument('--policy', type=argparse.FileType('w'),
                         default=sys.stdout, help='policy file')
-    parser.add_argument('traces', nargs='+', help='The strace logs.')
-    return parser.parse_args(argv)
+    parser.add_argument('input-logs', action=BucketInputFiles,
+                        help='strace and/or audit logs', nargs='+')
+    parser.add_argument('--audit-comm', type=str, metavar='PROCESS_NAME',
+                        help='relevant process name from the audit.log files')
+    opts = parser.parse_args(argv)
+
+    if opts.audit_logs and not auparse:
+        parser.error('Python bindings for the audit subsystem were not found.\n'
+                     'Please install the python3-audit (sometimes python-audit)'
+                     ' package for your distro to process audit logs: '
+                     f'{opts.audit_logs}')
+
+    if opts.audit_logs and not opts.audit_comm:
+        parser.error(f'--audit-comm is required when using audit logs as input:'
+                     f' {opts.audit_logs}')
+
+    if not opts.audit_logs and opts.audit_comm:
+        parser.error('--audit-comm was specified yet none of the input files '
+                     'matched our hueristic for an audit log')
+
+    return opts
 
 
 def get_seccomp_bpf_filter(syscall, entry):
-    """Return a minijail seccomp-bpf filter expression for the syscall."""
+    """Returns a minijail seccomp-bpf filter expression for the syscall."""
     arg_index = entry.arg_index
     arg_values = entry.value_set
     atoms = []
@@ -87,8 +161,7 @@ def get_seccomp_bpf_filter(syscall, entry):
         else:
             atoms.extend(['arg2 in ~PROT_EXEC', 'arg2 in ~PROT_WRITE'])
             arg_values = set()
-    atoms.extend('arg%d == %s' % (arg_index, arg_value)
-                 for arg_value in arg_values)
+    atoms.extend(f'arg{arg_index} == {arg_value}' for arg_value in arg_values)
     return ' || '.join(atoms)
 
 
@@ -98,7 +171,7 @@ def parse_trace_file(trace_filename, syscalls, arg_inspection):
                        ('x86' in trace_filename and
                         '64' not in trace_filename))
 
-    with open(trace_filename) as trace_file:
+    with open(trace_filename, encoding='utf8') as trace_file:
         for line in trace_file:
             matches = LINE_RE.match(line)
             if not matches:
@@ -108,6 +181,13 @@ def parse_trace_file(trace_filename, syscalls, arg_inspection):
             if uses_socketcall and syscall in SOCKETCALLS:
                 syscall = 'socketcall'
 
+            # strace omits the 'ARM_' prefix on all private ARM syscalls. Add
+            # it manually here as a workaround. These syscalls are exclusive
+            # to ARM so we don't need to predicate this on a trace_filename
+            # based heuristic for the arch.
+            if f'ARM_{syscall}' in PRIVATE_ARM_SYSCALLS.values():
+                syscall = f'ARM_{syscall}'
+
             syscalls[syscall] += 1
 
             args = [arg.strip() for arg in args.split(',')]
@@ -117,6 +197,92 @@ def parse_trace_file(trace_filename, syscalls, arg_inspection):
                 arg_inspection[syscall].value_set.add(arg_value)
 
 
+def parse_audit_log(audit_log, audit_comm, syscalls, arg_inspection):
+    """Parses one audit.log file generated by the Linux audit subsystem."""
+
+    unknown_syscall_re = re.compile(r'unknown-syscall\((?P<syscall_num>\d+)\)')
+
+    au = auparse.AuParser(auparse.AUSOURCE_FILE, audit_log)
+    # Quick validity check for whether this parses as a valid audit log. The
+    # first event should have at least one record.
+    if not au.first_record():
+        raise ValueError(f'Unable to parse audit log file {audit_log.name}')
+
+    # Iterate through events where _any_ contained record matches
+    # ((type == SECCOMP || type == SYSCALL) && comm == audit_comm).
+    au.search_add_item('type', '=', 'SECCOMP', auparse.AUSEARCH_RULE_CLEAR)
+    au.search_add_item('type', '=', 'SYSCALL', auparse.AUSEARCH_RULE_OR)
+    au.search_add_item('comm', '=', f'"{audit_comm}"',
+                       auparse.AUSEARCH_RULE_AND)
+
+    # auparse_find_field(3) will ignore preceding fields in the record and
+    # at the same time happily cross record boundaries when looking for the
+    # field. This helper method always seeks the cursor back to the first
+    # field in the record and stops searching before crossing over to the
+    # next record; making the search far less error prone.
+    # Also implicitly seeks the internal 'cursor' to the matching field
+    # for any subsequent calls like auparse_interpret_field.
+    def _find_field_in_current_record(name):
+        au.first_field()
+        while True:
+            if au.get_field_name() == name:
+                return au.get_field_str()
+            if not au.next_field():
+                return None
+
+    while au.search_next_event():
+        # The event may have multiple records. Loop through all.
+        au.first_record()
+        for _ in range(au.get_num_records()):
+            event_type = _find_field_in_current_record('type')
+            comm = _find_field_in_current_record('comm')
+            # Some of the records in this event may not be relevant
+            # despite the event-specific search filter. Skip those.
+            if (event_type not in ('SECCOMP', 'SYSCALL') or
+                    comm != f'"{audit_comm}"'):
+                au.next_record()
+                continue
+
+            if not _find_field_in_current_record('syscall'):
+                raise ValueError(f'Could not find field "syscall" in event of '
+                                 f'type {event_type}')
+            # Intepret the syscall field that's under our 'cursor' following the
+            # find. Interpreting fields yields human friendly names instead
+            # of integers. E.g '16' -> 'ioctl'.
+            syscall = au.interpret_field()
+
+            # TODO(crbug/1172449): Add these syscalls to upstream
+            # audit-userspace and remove this workaround.
+            # This is redundant but safe for non-ARM architectures due to the
+            # disjoint set of private syscall numbers.
+            match = unknown_syscall_re.match(syscall)
+            if match:
+                syscall_num = int(match.group('syscall_num'))
+                syscall = PRIVATE_ARM_SYSCALLS.get(syscall_num, syscall)
+
+            if ((syscall in arg_inspection and event_type == 'SECCOMP') or
+                (syscall not in arg_inspection and event_type == 'SYSCALL')):
+                # Skip SECCOMP records for syscalls that require argument
+                # inspection. Similarly, skip SYSCALL records for syscalls
+                # that do not require argument inspection. Technically such
+                # records wouldn't exist per our setup instructions but audit
+                # sometimes lets a few records slip through.
+                au.next_record()
+                continue
+            elif event_type == 'SYSCALL':
+                arg_field_name = f'a{arg_inspection[syscall].arg_index}'
+                if not _find_field_in_current_record(arg_field_name):
+                    raise ValueError(f'Could not find field "{arg_field_name}"'
+                                     f'in event of type {event_type}')
+                # Intepret the arg field that's under our 'cursor' following the
+                # find. This may yield a more human friendly name.
+                # E.g '5401' -> 'TCGETS'.
+                arg_inspection[syscall].value_set.add(au.interpret_field())
+
+            syscalls[syscall] += 1
+            au.next_record()
+
+
 def main(argv=None):
     """Main entrypoint."""
 
@@ -136,9 +302,20 @@ def main(argv=None):
         'mprotect': ArgInspectionEntry(2, set([])), # int prot
     }
 
+    if opts.verbose:
+        # Print an informational message to stderr in case the filetype detection
+        # heuristics are wonky.
+        print('Generating a seccomp policy using these input files:',
+              file=sys.stderr)
+        print(f'Strace logs: {opts.traces}', file=sys.stderr)
+        print(f'Audit logs: {opts.audit_logs}', file=sys.stderr)
+
     for trace_filename in opts.traces:
         parse_trace_file(trace_filename, syscalls, arg_inspection)
 
+    for audit_log in opts.audit_logs:
+        parse_audit_log(audit_log, opts.audit_comm, syscalls, arg_inspection)
+
     # Add the basic set if they are not yet present.
     basic_set = [
         'restart_syscall', 'exit', 'exit_group', 'rt_sigreturn',
@@ -168,13 +345,14 @@ def main(argv=None):
 
     for syscall in sorted_syscalls:
         if syscall in arg_inspection:
-            arg_filter = get_seccomp_bpf_filter(syscall, arg_inspection[syscall])
+            arg_filter = get_seccomp_bpf_filter(syscall,
+                                                arg_inspection[syscall])
         else:
             arg_filter = ALLOW
-        print('%s: %s' % (syscall, arg_filter), file=opts.policy)
+        print(f'{syscall}: {arg_filter}', file=opts.policy)
         if opts.frequency is not None:
-            print('%s: %s' % (syscall, syscalls[syscall]),
-                  file=opts.frequency)
+            print(f'{syscall}: {syscalls[syscall]}', file=opts.frequency)
+
 
 if __name__ == '__main__':
     sys.exit(main(sys.argv[1:]))
author	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2021-07-14 00:45:05 +0000
committer	Android Build Coastguard Worker <android-build-coastguard-worker@google.com>	2021-07-14 00:45:05 +0000
commit	1eb32343c195ac4920f4d37c751039be00838bb7 (patch)
tree	832aaffc3276b8ebfc0960c3d4bc4138ad8a6490 /tools/generate_seccomp_policy.py
parent	dd66b18a9fab1707c59befaa544aaac713e4f5f8 (diff)
parent	70e1cac63d9545c24b6667f99813e868f6c75c80 (diff)
download	minijail-1eb32343c195ac4920f4d37c751039be00838bb7.tar.gz