diff options
author | Jordan R Abrahams-Whitehead <ajordanr@google.com> | 2022-05-21 02:21:53 +0000 |
---|---|---|
committer | Chromeos LUCI <chromeos-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2022-06-01 17:00:18 +0000 |
commit | 69f5ec0984e02c8a7f0748522a333fca57f357aa (patch) | |
tree | fcb712a011d7090bd496bec431ed9d692322880d | |
parent | 49d95342584d81f73b33cd25a178f0c8854cd826 (diff) | |
download | toolchain-utils-69f5ec0984e02c8a7f0748522a333fca57f357aa.tar.gz |
llvm_tools: Add patch_utils.py and unittests
This introduces the patch_utils.py library, which
contains various PATCHES.json and patch_manager
utilities that will be useful for future patch manager
restructuring.
In particular, patch_manager.py doesn't explain why its
patches fail, or give any information as to what is wrong with
its patch applications.
patch_utils.py provides the PatchEntry class, which is a self
contained object which can provide this diagnostic information.
This module will later be incorporated into patch_manager.py
and get_upstream_patches.py
BUG=b:188465085, b:227216280
TEST=./patch_utils_unittest.py
Change-Id: I6f6e24e6449ea68f6751fbcad14fca76c1bbaec8
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/toolchain-utils/+/3648887
Tested-by: Jordan Abrahams-Whitehead <ajordanr@google.com>
Reviewed-by: George Burgess <gbiv@chromium.org>
Commit-Queue: Jordan Abrahams-Whitehead <ajordanr@google.com>
-rw-r--r-- | llvm_tools/patch_utils.py | 214 | ||||
-rwxr-xr-x | llvm_tools/patch_utils_unittest.py | 178 |
2 files changed, 392 insertions, 0 deletions
diff --git a/llvm_tools/patch_utils.py b/llvm_tools/patch_utils.py new file mode 100644 index 00000000..2f282990 --- /dev/null +++ b/llvm_tools/patch_utils.py @@ -0,0 +1,214 @@ +# Copyright 2022 The ChromiumOS Authors. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Provides patch utilities for PATCHES.json file handling.""" + +import collections +import dataclasses +import io +from pathlib import Path +import re +import subprocess +import sys +from typing import Any, Dict, List, Optional + + +CHECKED_FILE_RE = re.compile(r'^checking file\s+(.*)$') +HUNK_FAILED_RE = re.compile(r'^Hunk #(\d+) FAILED at.*') +HUNK_HEADER_RE = re.compile(r'^@@\s+-(\d+),(\d+)\s+\+(\d+),(\d+)\s+@@') +HUNK_END_RE = re.compile(r'^--\s*$') +PATCH_SUBFILE_HEADER_RE = re.compile(r'^\+\+\+ [ab]/(.*)$') + + +@dataclasses.dataclass +class Hunk: + """Represents a patch Hunk.""" + hunk_id: int + """Hunk ID for the current file.""" + orig_start: int + orig_hunk_len: int + patch_start: int + patch_hunk_len: int + patch_hunk_lineno_begin: int + patch_hunk_lineno_end: Optional[int] + + +def parse_patch_stream(patch_stream: io.TextIOBase) -> Dict[str, List[Hunk]]: + """Parse a patch file-like into Hunks. + + Args: + patch_stream: A IO stream formatted like a git patch file. + + Returns: + A dictionary mapping filenames to lists of Hunks present + in the patch stream. + """ + + current_filepath = None + current_hunk_id = 0 + current_hunk = None + out = collections.defaultdict(list) + for lineno, line in enumerate(patch_stream.readlines()): + subfile_header = PATCH_SUBFILE_HEADER_RE.match(line) + if subfile_header: + current_filepath = subfile_header.group(1) + if not current_filepath: + raise RuntimeError('Could not get file header in patch stream') + # Need to reset the hunk id, as it's per-file. + current_hunk_id = 0 + continue + hunk_header = HUNK_HEADER_RE.match(line) + if hunk_header: + if not current_filepath: + raise RuntimeError('Parsed hunk before file header in patch stream') + if current_hunk: + # Already parsing a hunk + current_hunk.patch_hunk_lineno_end = lineno + current_hunk_id += 1 + current_hunk = Hunk(hunk_id=current_hunk_id, + orig_start=int(hunk_header.group(1)), + orig_hunk_len=int(hunk_header.group(2)), + patch_start=int(hunk_header.group(3)), + patch_hunk_len=int(hunk_header.group(4)), + patch_hunk_lineno_begin=lineno + 1, + patch_hunk_lineno_end=None) + out[current_filepath].append(current_hunk) + continue + if current_hunk and HUNK_END_RE.match(line): + current_hunk.patch_hunk_lineno_end = lineno + return out + + +def parse_failed_patch_output(text: str) -> Dict[str, List[int]]: + current_file = None + failed_hunks = collections.defaultdict(list) + for eline in text.split('\n'): + checked_file_match = CHECKED_FILE_RE.match(eline) + if checked_file_match: + current_file = checked_file_match.group(1) + continue + failed_match = HUNK_FAILED_RE.match(eline) + if failed_match: + if not current_file: + raise ValueError('Input stream was not parsable') + hunk_id = int(failed_match.group(1)) + failed_hunks[current_file].append(hunk_id) + return failed_hunks + + +@dataclasses.dataclass(frozen=True) +class PatchResult: + """Result of a patch application.""" + succeeded: bool + failed_hunks: Dict[str, List[Hunk]] = dataclasses.field(default_factory=dict) + + def __bool__(self): + return self.succeeded + + +@dataclasses.dataclass +class PatchEntry: + """Object mapping of an entry of PATCHES.json.""" + workdir: Path + metadata: Dict[str, Any] + platforms: List[str] + rel_patch_path: str + version_range: Dict[str, int] + _parsed_hunks = None + + def __post_init__(self): + if not self.workdir.is_dir(): + raise ValueError(f'workdir {self.workdir} is not a directory') + + @classmethod + def from_dict(cls, workdir: Path, data: Dict[str, Any]): + """Instatiate from a dictionary. + + Dictionary must have at least the following keys: + + { + 'metadata': { + 'title': '<title>' + }, + 'platforms': ['<platform>'], + 'rel_patch_path': '<relative patch path to workdir>', + 'version_range': { + 'from': <int>, + 'until': <int>, + }, + } + + Returns: + A new PatchEntry. + """ + return cls(workdir, data['metadata'], data['platforms'], + data['rel_patch_path'], data['version_range']) + + def to_dict(self) -> Dict[str, Any]: + return { + 'metadata': self.metadata, + 'platforms': self.platforms, + 'rel_patch_path': self.rel_patch_path, + 'version_range': self.version_range, + } + + def parsed_hunks(self) -> Dict[str, List[Hunk]]: + # Minor caching here because IO is slow. + if not self._parsed_hunks: + with self.patch_path().open(encoding='utf-8') as f: + self._parsed_hunks = parse_patch_stream(f) + return self._parsed_hunks + + def patch_path(self) -> Path: + return self.workdir / self.rel_patch_path + + def can_patch_version(self, svn_version: int) -> bool: + """Is this patch meant to apply to `svn_version`?""" + # Sometimes the key is there, but it's set to None. + from_v = self.version_range.get('from') or 0 + until_v = self.version_range.get('until') + if until_v is None: + until_v = sys.maxsize + return from_v <= svn_version < until_v + + def is_old(self, svn_version: int) -> bool: + """Is this patch old compared to `svn_version`?""" + until_v = self.version_range.get('until') + # Sometimes the key is there, but it's set to None. + if until_v is None: + until_v = sys.maxsize + return svn_version >= until_v + + def apply(self, + root_dir: Path, + extra_args: Optional[List[str]] = None) -> PatchResult: + """Apply a patch to a given directory.""" + if not extra_args: + extra_args = [] + # Cmd to apply a patch in the src unpack path. + cmd = [ + 'patch', '-d', + root_dir.absolute(), '-f', '-p1', '--no-backup-if-mismatch', '-i', + self.patch_path().absolute() + ] + extra_args + try: + subprocess.run(cmd, encoding='utf-8', check=True, stdout=subprocess.PIPE) + except subprocess.CalledProcessError as e: + parsed_hunks = self.parsed_hunks() + failed_hunks_id_dict = parse_failed_patch_output(e.stdout) + failed_hunks = {} + for path, failed_hunk_ids in failed_hunks_id_dict.items(): + hunks_for_file = parsed_hunks[path] + failed_hunks[path] = [ + hunk for hunk in hunks_for_file if hunk.hunk_id in failed_hunk_ids + ] + return PatchResult(succeeded=False, failed_hunks=failed_hunks) + return PatchResult(succeeded=True) + + def test_apply(self, root_dir: Path) -> PatchResult: + """Dry run applying a patch to a given directory.""" + return self.apply(root_dir, ['--dry-run']) + + def title(self) -> str: + return self.metadata['title'] diff --git a/llvm_tools/patch_utils_unittest.py b/llvm_tools/patch_utils_unittest.py new file mode 100755 index 00000000..3dfe52b2 --- /dev/null +++ b/llvm_tools/patch_utils_unittest.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +# Copyright 2022 The ChromiumOS Authors. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Unit tests for the patch_utils.py file.""" + +from pathlib import Path +import unittest +import unittest.mock as mock + +import patch_utils as pu + + +class TestPatchUtils(unittest.TestCase): + """Test the patch_utils.""" + + def test_from_to_dict(self): + """Test to and from dict conversion.""" + d = TestPatchUtils._default_json_dict() + d['metadata'] = { + 'title': 'hello world', + 'info': [], + 'other_extra_info': { + 'extra_flags': [], + } + } + e = pu.PatchEntry.from_dict(TestPatchUtils._mock_dir(), d) + self.assertEqual(d, e.to_dict()) + + def test_can_patch_version(self): + """Test that patch application based on version is correct.""" + base_dict = TestPatchUtils._default_json_dict() + workdir = TestPatchUtils._mock_dir() + e1 = pu.PatchEntry.from_dict(workdir, base_dict) + self.assertFalse(e1.can_patch_version(3)) + self.assertTrue(e1.can_patch_version(4)) + self.assertTrue(e1.can_patch_version(5)) + self.assertFalse(e1.can_patch_version(9)) + base_dict['version_range'] = {'until': 9} + e2 = pu.PatchEntry.from_dict(workdir, base_dict) + self.assertTrue(e2.can_patch_version(0)) + self.assertTrue(e2.can_patch_version(5)) + self.assertFalse(e2.can_patch_version(9)) + base_dict['version_range'] = {'from': 4} + e3 = pu.PatchEntry.from_dict(workdir, base_dict) + self.assertFalse(e3.can_patch_version(3)) + self.assertTrue(e3.can_patch_version(5)) + self.assertTrue(e3.can_patch_version(1 << 31)) + base_dict['version_range'] = {'from': 4, 'until': None} + e4 = pu.PatchEntry.from_dict(workdir, base_dict) + self.assertFalse(e4.can_patch_version(3)) + self.assertTrue(e4.can_patch_version(5)) + self.assertTrue(e4.can_patch_version(1 << 31)) + base_dict['version_range'] = {'from': None, 'until': 9} + e5 = pu.PatchEntry.from_dict(workdir, base_dict) + self.assertTrue(e5.can_patch_version(0)) + self.assertTrue(e5.can_patch_version(5)) + self.assertFalse(e5.can_patch_version(9)) + + def test_parsed_hunks(self): + """Test that we can parse patch file hunks.""" + m = mock.mock_open(read_data=_EXAMPLE_PATCH) + + def mocked_open(self, *args, **kwargs): + return m(self, *args, **kwargs) + + with mock.patch.object(Path, 'open', mocked_open): + e = pu.PatchEntry.from_dict(TestPatchUtils._mock_dir(), + TestPatchUtils._default_json_dict()) + hunk_dict = e.parsed_hunks() + + m.assert_called() + filename1 = 'clang/lib/Driver/ToolChains/Clang.cpp' + filename2 = 'llvm/lib/Passes/PassBuilder.cpp' + self.assertEqual(set(hunk_dict.keys()), {filename1, filename2}) + hunk_list1 = hunk_dict[filename1] + hunk_list2 = hunk_dict[filename2] + self.assertEqual(len(hunk_list1), 1) + self.assertEqual(len(hunk_list2), 2) + + def test_apply_success(self): + """Test that we can call apply.""" + src_dir = TestPatchUtils._mock_dir('somewhere/llvm-project') + patch_dir = TestPatchUtils._mock_dir() + e = pu.PatchEntry.from_dict(patch_dir, TestPatchUtils._default_json_dict()) + with mock.patch('subprocess.run', mock.MagicMock()): + result = e.apply(src_dir) + self.assertTrue(result.succeeded) + + def test_parse_failed_patch_output(self): + """Test that we can call parse `patch` output.""" + fixture = """ +checking file a/b/c.cpp +Hunk #1 SUCCEEDED at 96 with fuzz 1. +Hunk #12 FAILED at 77. +Hunk #42 FAILED at 1979. +checking file x/y/z.h +Hunk #4 FAILED at 30. +checking file works.cpp +Hunk #1 SUCCEEDED at 96 with fuzz 1. +""" + result = pu.parse_failed_patch_output(fixture) + self.assertEqual(result['a/b/c.cpp'], [12, 42]) + self.assertEqual(result['x/y/z.h'], [4]) + self.assertNotIn('works.cpp', result) + + @staticmethod + def _default_json_dict(): + return { + 'metadata': { + 'title': 'hello world', + }, + 'platforms': [], + 'rel_patch_path': 'x/y/z', + 'version_range': { + 'from': 4, + 'until': 9, + } + } + + @staticmethod + def _mock_dir(path: str = 'a/b/c'): + workdir = Path(path) + workdir = mock.MagicMock(workdir) + workdir.is_dir = lambda: True + workdir.joinpath = lambda x: Path(path).joinpath(x) + workdir.__truediv__ = lambda self, x: self.joinpath(x) + return workdir + + +_EXAMPLE_PATCH = """ +diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp +index 5620a543438..099eb769ca5 100644 +--- a/clang/lib/Driver/ToolChains/Clang.cpp ++++ b/clang/lib/Driver/ToolChains/Clang.cpp +@@ -3995,8 +3995,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, + Args.hasArg(options::OPT_dA)) + CmdArgs.push_back("-masm-verbose"); + +- if (!TC.useIntegratedAs()) ++ if (!TC.useIntegratedAs()) { + CmdArgs.push_back("-no-integrated-as"); ++ CmdArgs.push_back("-mllvm"); ++ CmdArgs.push_back("-enable-call-graph-profile-sort=false"); ++ } + + if (Args.hasArg(options::OPT_fdebug_pass_structure)) { + CmdArgs.push_back("-mdebug-pass"); +diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp +index c5fd68299eb..4c6e15eeeb9 100644 +--- a/llvm/lib/Passes/PassBuilder.cpp ++++ b/llvm/lib/Passes/PassBuilder.cpp +@@ -212,6 +212,10 @@ static cl::opt<bool> + EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden, + cl::desc("Enable control height reduction optimization (CHR)")); + ++static cl::opt<bool> EnableCallGraphProfileSort( ++ "enable-call-graph-profile-sort", cl::init(true), cl::Hidden, ++ cl::desc("Enable call graph profile pass for the new PM (default = on)")); ++ + extern cl::opt<bool> EnableHotColdSplit; + extern cl::opt<bool> EnableOrderFileInstrumentation; + +@@ -939,7 +943,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline( + // Add the core optimizing pipeline. + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM))); + +- MPM.addPass(CGProfilePass()); ++ if (EnableCallGraphProfileSort) ++ MPM.addPass(CGProfilePass()); + + // Now we need to do some global optimization transforms. + // FIXME: It would seem like these should come first in the optimization +""" + +if __name__ == '__main__': + unittest.main() |