diff options
authorElliot Berman <>2022-01-11 16:18:26 -0800
committerTreehugger Robot <>2022-03-01 22:07:21 +0000
commitc93827a5267ca6a5bb7cb926f2173d141d8f2dbc (patch)
parentc726f094baec729fe2f4929408d3d8fe861bc5cc (diff)
compare_mixed_trees: Add script to compare mixed tree sources
In a mixed build environment, it is important to keep the files used to compile GKI kernel the same between the GKI kernel and the device kernel. Otherwise, unexpected issues may arise. For instance, a change made to a core kernel file on the device kernel would not have any effect on the generated boot.img since that file isn't used to compile vmlinux, the one from GKI kernel tree is. compare_mixed_trees is a host-processing python script to compare the mixed build source trees. After compiling a mixed-build workspace, compare_mixed_trees can be run. This script will determine all the files used to compile the GKI kernel and check whether any differ on the device kernel. The "core kernel files" list is generated by inspecting all the .cmd files that Kbuild generates. These .cmd files contain list of source files compiled and the files they depend on. Bug: 203187541 Change-Id: I7ee5646c77f95aae732eec81177801a997f9c676
1 files changed, 168 insertions, 0 deletions
diff --git a/gki/compare_mixed_trees b/gki/compare_mixed_trees
new file mode 100755
index 00000000..713a58a9
--- /dev/null
+++ b/gki/compare_mixed_trees
@@ -0,0 +1,168 @@
+#!/usr/bin/env python3
+# Python version issues? Try using build-tools/path/linux-x86/python3
+import argparse
+import collections
+import filecmp
+import multiprocessing
+import os
+import pathlib
+import re
+import sys
+import textwrap
+_SOURCE_RE = re.compile(r'.* := (?P<file>.*)\n?$')
+_WILDCARD_RE = re.compile(r'\$\(wildcard (?P<file>[^\)]+)\)')
+BuiltFilesResult = collections.namedtuple('BuiltFilesResult', ['files', 'src_dir'])
+def parse_cmd_file(dotcmd, parse_deps=True):
+ """
+ Parse a .cmd file for the source files it used to build.
+ The .cmd is a Makefile script generated by scripts/basic/fixdep.c and has
+ following format:
+ cmd_init/main.o := clang <......>
+ source_init/main.o := <root_dir>/common/init/main.c
+ deps_init/main.o := \
+ $(wildcard include/config/INIT_ENV_ARG_LIMIT) \
+ ...
+ <root_dir>/include/common/include/linux/compiler-version.h \
+ arch/arm64/include/generated/uapi/asm/sockios.h \
+ ...
+ <root_dir>/common/include/kunit/try-catch.h \
+ init/main.o: $(deps_init/main.o)
+ $(deps_init/main.o):
+ We're interested in parsing source_ line to get the source file
+ and all the items in deps_. For the sake of not missing anything, we pull out
+ all the paths inside $(wildcard <path>) and non-absolute paths, which mostly
+ appear to be generated in the output folder. We'll check if they really exist
+ later and remove them from our list if not.
+ """
+ deps = set()
+ source = None
+ with as f:
+ in_deps = False
+ for line in f.readlines():
+ if line.startswith('source_'):
+ m = _SOURCE_RE.fullmatch(line)
+ if m:
+ source = os.path.normpath('file'))
+ if parse_deps:
+ if in_deps:
+ m =
+ split = line.split()
+ if m:
+ deps.add(os.path.normpath('file')))
+ elif len(split) > 0:
+ deps.add(os.path.normpath(split[0]))
+ if not split or split[-1] != '\\':
+ in_deps = False
+ if line.startswith('deps_'):
+ in_deps = True
+ return BuiltFilesResult(source, deps)
+def find_source_dir(dir):
+ """Guess the source directory for a build output folder.
+ The list of files we are interested in should be relative to the kernel source
+ directory. Dependencies listed by the .cmd files are a mix of absolute and
+ relative paths.
+ init/main.c has been part of kernel since 2.6.12-rc2. It appears unlikely to
+ go away and thus is a good candidate to make assumptions about its existence
+ in the build output and source trees.
+ """
+ main_cmd = dir / 'init' / '.main.o.cmd'
+ main_c, _ = parse_cmd_file(main_cmd, parse_deps=False)
+ if not main_c:
+ print(f'ERROR! Failed to extract GKI kernel directory from {main_cmd}')
+ sys.exit(1)
+ # Remove "init/main.c" to leave us with GKI_KERNEL_DIR
+ return pathlib.Path(main_c).parent.parent
+def extract_built_files(dir):
+ """Extract source files and their dependencies from a build directory
+ Args:
+ dir: Kernel build output folder (e.g. out/android-mainline/common)
+ Returns:
+ A tuple. The first value is the set of found files used in the build.
+ The second value is the location of the source directory.
+ """
+ files = set()
+ src_dir = find_source_dir(dir)
+ src_dir_prefix = str(src_dir) + os.pathsep
+ with multiprocessing.Pool() as p:
+ for source, deps in, dir.glob('**/.*.cmd')):
+ if source:
+ files.add(source.removeprefix(src_dir_prefix))
+ files.update(dep.removeprefix(src_dir_prefix) for dep in deps)
+ return BuiltFilesResult(set(f for f in files if os.path.exists(src_dir / f)),
+ src_dir)
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(
+ description='Compare the files used to compile a GKI kernel with those on a'
+ ' device kernel',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=textwrap.dedent('''
+ Comparison of the trees is achieved by parsing the build output of the GKI kernel tree (GKI_KERNEL_OUT_DIR). The .cmd
+ files list the file dependencies and give a good idea of which files are used when compiling the GKI kernel.
+ Limitations:
+ - Vendor kernel should have GKI kernel baseline merged into its tree for accurate diff reporting
+ - Does not compare files which *would* be compiled into a vmlinux in a vendor kernel build.
+ For instance, if vendor kernel has added obj-y += vendor_file.o, script would not detect such addition.
+ Similarly, implicit Kconfig "select FOO" from a vendor module is not detected. The assumption
+ is that such differences would be caught during other build steps or during test.
+ - All .cmd from GKI_KERNEL_OUT_DIR are scanned. If extra build steps are run (e.g. menuconfig) or build is
+ old, then extra files might be added to the list of GKI source files.
+ '''))
+ parser.add_argument('gki_out_dir', metavar='GKI_KERNEL_OUT_DIR',
+ type=pathlib.Path,
+ help='Location of the GKI kernel output folder.')
+ parser.add_argument('--gki-files', type=pathlib.Path,
+ help='Location to write GKI files list to.')
+ parser.add_argument('vendor_tree', metavar='VENDOR_TREE', nargs='?',
+ type=pathlib.Path,
+ help='Location of the vendor source tree. '
+ 'If not provided,then just the list of GKI kernel '
+ 'output files is generated.')
+ parser.add_argument('--changed-files', type=pathlib.Path,
+ help='Location to write changed files list to.')
+ args = parser.parse_args()
+ built_files, source_dir = extract_built_files(args.gki_out_dir)
+ print(f'There are {len(built_files)} source files contributing to the build '
+ f'in {args.gki_out_dir}.')
+ if args.gki_files:
+ with open(args.gki_files, mode='w') as f:
+ f.writelines(f'{file}\n' for file in built_files)
+ if args.vendor_tree:
+ match, mismatch, errors = filecmp.cmpfiles(source_dir, args.vendor_tree,
+ built_files)
+ diff_files = {*mismatch, *errors}
+ print(f'There are {len(diff_files)} source file(s) changed in vendor tree.')
+ print('\n '.join(f' {file}' for file in diff_files))
+ if args.changed_files:
+ with open(args.changed_files, mode='w') as f:
+ f.writelines(f'{file}\n' for file in diff_files)
+ if diff_files:
+ sys.exit(2)