diff options
Diffstat (limited to 'gki/compare_mixed_trees')
-rwxr-xr-x | gki/compare_mixed_trees | 168 |
1 files changed, 168 insertions, 0 deletions
diff --git a/gki/compare_mixed_trees b/gki/compare_mixed_trees new file mode 100755 index 00000000..713a58a9 --- /dev/null +++ b/gki/compare_mixed_trees @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 + +# Python version issues? Try using build-tools/path/linux-x86/python3 + +import argparse +import collections +import filecmp +import multiprocessing +import os +import pathlib +import re +import sys +import textwrap + +_SOURCE_RE = re.compile(r'.* := (?P<file>.*)\n?$') +_WILDCARD_RE = re.compile(r'\$\(wildcard (?P<file>[^\)]+)\)') + +BuiltFilesResult = collections.namedtuple('BuiltFilesResult', ['files', 'src_dir']) + +def parse_cmd_file(dotcmd, parse_deps=True): + """ + Parse a .cmd file for the source files it used to build. + + The .cmd is a Makefile script generated by scripts/basic/fixdep.c and has + following format: + + cmd_init/main.o := clang <......> + + source_init/main.o := <root_dir>/common/init/main.c + + deps_init/main.o := \ + $(wildcard include/config/INIT_ENV_ARG_LIMIT) \ + ... + <root_dir>/include/common/include/linux/compiler-version.h \ + arch/arm64/include/generated/uapi/asm/sockios.h \ + ... + <root_dir>/common/include/kunit/try-catch.h \ + + init/main.o: $(deps_init/main.o) + + $(deps_init/main.o): + + We're interested in parsing source_ line to get the source file + and all the items in deps_. For the sake of not missing anything, we pull out + all the paths inside $(wildcard <path>) and non-absolute paths, which mostly + appear to be generated in the output folder. We'll check if they really exist + later and remove them from our list if not. + """ + deps = set() + source = None + with dotcmd.open() as f: + in_deps = False + + for line in f.readlines(): + if line.startswith('source_'): + m = _SOURCE_RE.fullmatch(line) + if m: + source = os.path.normpath(m.group('file')) + + if parse_deps: + if in_deps: + m = _WILDCARD_RE.search(line) + split = line.split() + if m: + deps.add(os.path.normpath(m.group('file'))) + elif len(split) > 0: + deps.add(os.path.normpath(split[0])) + + if not split or split[-1] != '\\': + in_deps = False + if line.startswith('deps_'): + in_deps = True + return BuiltFilesResult(source, deps) + +def find_source_dir(dir): + """Guess the source directory for a build output folder. + + The list of files we are interested in should be relative to the kernel source + directory. Dependencies listed by the .cmd files are a mix of absolute and + relative paths. + + init/main.c has been part of kernel since 2.6.12-rc2. It appears unlikely to + go away and thus is a good candidate to make assumptions about its existence + in the build output and source trees. + """ + main_cmd = dir / 'init' / '.main.o.cmd' + main_c, _ = parse_cmd_file(main_cmd, parse_deps=False) + if not main_c: + print(f'ERROR! Failed to extract GKI kernel directory from {main_cmd}') + sys.exit(1) + + # Remove "init/main.c" to leave us with GKI_KERNEL_DIR + return pathlib.Path(main_c).parent.parent + +def extract_built_files(dir): + """Extract source files and their dependencies from a build directory + + Args: + dir: Kernel build output folder (e.g. out/android-mainline/common) + + Returns: + A tuple. The first value is the set of found files used in the build. + The second value is the location of the source directory. + """ + files = set() + src_dir = find_source_dir(dir) + src_dir_prefix = str(src_dir) + os.pathsep + + with multiprocessing.Pool() as p: + for source, deps in p.map(parse_cmd_file, dir.glob('**/.*.cmd')): + if source: + files.add(source.removeprefix(src_dir_prefix)) + files.update(dep.removeprefix(src_dir_prefix) for dep in deps) + + return BuiltFilesResult(set(f for f in files if os.path.exists(src_dir / f)), + src_dir) + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Compare the files used to compile a GKI kernel with those on a' + ' device kernel', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=textwrap.dedent(''' + Comparison of the trees is achieved by parsing the build output of the GKI kernel tree (GKI_KERNEL_OUT_DIR). The .cmd + files list the file dependencies and give a good idea of which files are used when compiling the GKI kernel. + + Limitations: + - Vendor kernel should have GKI kernel baseline merged into its tree for accurate diff reporting + - Does not compare files which *would* be compiled into a vmlinux in a vendor kernel build. + For instance, if vendor kernel has added obj-y += vendor_file.o, script would not detect such addition. + Similarly, implicit Kconfig "select FOO" from a vendor module is not detected. The assumption + is that such differences would be caught during other build steps or during test. + - All .cmd from GKI_KERNEL_OUT_DIR are scanned. If extra build steps are run (e.g. menuconfig) or build is + old, then extra files might be added to the list of GKI source files. + ''')) + parser.add_argument('gki_out_dir', metavar='GKI_KERNEL_OUT_DIR', + type=pathlib.Path, + help='Location of the GKI kernel output folder.') + parser.add_argument('--gki-files', type=pathlib.Path, + help='Location to write GKI files list to.') + parser.add_argument('vendor_tree', metavar='VENDOR_TREE', nargs='?', + type=pathlib.Path, + help='Location of the vendor source tree. ' + 'If not provided,then just the list of GKI kernel ' + 'output files is generated.') + parser.add_argument('--changed-files', type=pathlib.Path, + help='Location to write changed files list to.') + + args = parser.parse_args() + + built_files, source_dir = extract_built_files(args.gki_out_dir) + print(f'There are {len(built_files)} source files contributing to the build ' + f'in {args.gki_out_dir}.') + if args.gki_files: + with open(args.gki_files, mode='w') as f: + f.writelines(f'{file}\n' for file in built_files) + + if args.vendor_tree: + match, mismatch, errors = filecmp.cmpfiles(source_dir, args.vendor_tree, + built_files) + diff_files = {*mismatch, *errors} + print(f'There are {len(diff_files)} source file(s) changed in vendor tree.') + print('\n '.join(f' {file}' for file in diff_files)) + if args.changed_files: + with open(args.changed_files, mode='w') as f: + f.writelines(f'{file}\n' for file in diff_files) + if diff_files: + sys.exit(2) |