aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan R Abrahams-Whitehead <ajordanr@google.com>2022-05-21 02:21:53 +0000
committerChromeos LUCI <chromeos-scoped@luci-project-accounts.iam.gserviceaccount.com>2022-06-01 17:00:18 +0000
commit69f5ec0984e02c8a7f0748522a333fca57f357aa (patch)
treefcb712a011d7090bd496bec431ed9d692322880d
parent49d95342584d81f73b33cd25a178f0c8854cd826 (diff)
downloadtoolchain-utils-69f5ec0984e02c8a7f0748522a333fca57f357aa.tar.gz
llvm_tools: Add patch_utils.py and unittests
This introduces the patch_utils.py library, which contains various PATCHES.json and patch_manager utilities that will be useful for future patch manager restructuring. In particular, patch_manager.py doesn't explain why its patches fail, or give any information as to what is wrong with its patch applications. patch_utils.py provides the PatchEntry class, which is a self contained object which can provide this diagnostic information. This module will later be incorporated into patch_manager.py and get_upstream_patches.py BUG=b:188465085, b:227216280 TEST=./patch_utils_unittest.py Change-Id: I6f6e24e6449ea68f6751fbcad14fca76c1bbaec8 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/toolchain-utils/+/3648887 Tested-by: Jordan Abrahams-Whitehead <ajordanr@google.com> Reviewed-by: George Burgess <gbiv@chromium.org> Commit-Queue: Jordan Abrahams-Whitehead <ajordanr@google.com>
-rw-r--r--llvm_tools/patch_utils.py214
-rwxr-xr-xllvm_tools/patch_utils_unittest.py178
2 files changed, 392 insertions, 0 deletions
diff --git a/llvm_tools/patch_utils.py b/llvm_tools/patch_utils.py
new file mode 100644
index 00000000..2f282990
--- /dev/null
+++ b/llvm_tools/patch_utils.py
@@ -0,0 +1,214 @@
+# Copyright 2022 The ChromiumOS Authors.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Provides patch utilities for PATCHES.json file handling."""
+
+import collections
+import dataclasses
+import io
+from pathlib import Path
+import re
+import subprocess
+import sys
+from typing import Any, Dict, List, Optional
+
+
+CHECKED_FILE_RE = re.compile(r'^checking file\s+(.*)$')
+HUNK_FAILED_RE = re.compile(r'^Hunk #(\d+) FAILED at.*')
+HUNK_HEADER_RE = re.compile(r'^@@\s+-(\d+),(\d+)\s+\+(\d+),(\d+)\s+@@')
+HUNK_END_RE = re.compile(r'^--\s*$')
+PATCH_SUBFILE_HEADER_RE = re.compile(r'^\+\+\+ [ab]/(.*)$')
+
+
+@dataclasses.dataclass
+class Hunk:
+ """Represents a patch Hunk."""
+ hunk_id: int
+ """Hunk ID for the current file."""
+ orig_start: int
+ orig_hunk_len: int
+ patch_start: int
+ patch_hunk_len: int
+ patch_hunk_lineno_begin: int
+ patch_hunk_lineno_end: Optional[int]
+
+
+def parse_patch_stream(patch_stream: io.TextIOBase) -> Dict[str, List[Hunk]]:
+ """Parse a patch file-like into Hunks.
+
+ Args:
+ patch_stream: A IO stream formatted like a git patch file.
+
+ Returns:
+ A dictionary mapping filenames to lists of Hunks present
+ in the patch stream.
+ """
+
+ current_filepath = None
+ current_hunk_id = 0
+ current_hunk = None
+ out = collections.defaultdict(list)
+ for lineno, line in enumerate(patch_stream.readlines()):
+ subfile_header = PATCH_SUBFILE_HEADER_RE.match(line)
+ if subfile_header:
+ current_filepath = subfile_header.group(1)
+ if not current_filepath:
+ raise RuntimeError('Could not get file header in patch stream')
+ # Need to reset the hunk id, as it's per-file.
+ current_hunk_id = 0
+ continue
+ hunk_header = HUNK_HEADER_RE.match(line)
+ if hunk_header:
+ if not current_filepath:
+ raise RuntimeError('Parsed hunk before file header in patch stream')
+ if current_hunk:
+ # Already parsing a hunk
+ current_hunk.patch_hunk_lineno_end = lineno
+ current_hunk_id += 1
+ current_hunk = Hunk(hunk_id=current_hunk_id,
+ orig_start=int(hunk_header.group(1)),
+ orig_hunk_len=int(hunk_header.group(2)),
+ patch_start=int(hunk_header.group(3)),
+ patch_hunk_len=int(hunk_header.group(4)),
+ patch_hunk_lineno_begin=lineno + 1,
+ patch_hunk_lineno_end=None)
+ out[current_filepath].append(current_hunk)
+ continue
+ if current_hunk and HUNK_END_RE.match(line):
+ current_hunk.patch_hunk_lineno_end = lineno
+ return out
+
+
+def parse_failed_patch_output(text: str) -> Dict[str, List[int]]:
+ current_file = None
+ failed_hunks = collections.defaultdict(list)
+ for eline in text.split('\n'):
+ checked_file_match = CHECKED_FILE_RE.match(eline)
+ if checked_file_match:
+ current_file = checked_file_match.group(1)
+ continue
+ failed_match = HUNK_FAILED_RE.match(eline)
+ if failed_match:
+ if not current_file:
+ raise ValueError('Input stream was not parsable')
+ hunk_id = int(failed_match.group(1))
+ failed_hunks[current_file].append(hunk_id)
+ return failed_hunks
+
+
+@dataclasses.dataclass(frozen=True)
+class PatchResult:
+ """Result of a patch application."""
+ succeeded: bool
+ failed_hunks: Dict[str, List[Hunk]] = dataclasses.field(default_factory=dict)
+
+ def __bool__(self):
+ return self.succeeded
+
+
+@dataclasses.dataclass
+class PatchEntry:
+ """Object mapping of an entry of PATCHES.json."""
+ workdir: Path
+ metadata: Dict[str, Any]
+ platforms: List[str]
+ rel_patch_path: str
+ version_range: Dict[str, int]
+ _parsed_hunks = None
+
+ def __post_init__(self):
+ if not self.workdir.is_dir():
+ raise ValueError(f'workdir {self.workdir} is not a directory')
+
+ @classmethod
+ def from_dict(cls, workdir: Path, data: Dict[str, Any]):
+ """Instatiate from a dictionary.
+
+ Dictionary must have at least the following keys:
+
+ {
+ 'metadata': {
+ 'title': '<title>'
+ },
+ 'platforms': ['<platform>'],
+ 'rel_patch_path': '<relative patch path to workdir>',
+ 'version_range': {
+ 'from': <int>,
+ 'until': <int>,
+ },
+ }
+
+ Returns:
+ A new PatchEntry.
+ """
+ return cls(workdir, data['metadata'], data['platforms'],
+ data['rel_patch_path'], data['version_range'])
+
+ def to_dict(self) -> Dict[str, Any]:
+ return {
+ 'metadata': self.metadata,
+ 'platforms': self.platforms,
+ 'rel_patch_path': self.rel_patch_path,
+ 'version_range': self.version_range,
+ }
+
+ def parsed_hunks(self) -> Dict[str, List[Hunk]]:
+ # Minor caching here because IO is slow.
+ if not self._parsed_hunks:
+ with self.patch_path().open(encoding='utf-8') as f:
+ self._parsed_hunks = parse_patch_stream(f)
+ return self._parsed_hunks
+
+ def patch_path(self) -> Path:
+ return self.workdir / self.rel_patch_path
+
+ def can_patch_version(self, svn_version: int) -> bool:
+ """Is this patch meant to apply to `svn_version`?"""
+ # Sometimes the key is there, but it's set to None.
+ from_v = self.version_range.get('from') or 0
+ until_v = self.version_range.get('until')
+ if until_v is None:
+ until_v = sys.maxsize
+ return from_v <= svn_version < until_v
+
+ def is_old(self, svn_version: int) -> bool:
+ """Is this patch old compared to `svn_version`?"""
+ until_v = self.version_range.get('until')
+ # Sometimes the key is there, but it's set to None.
+ if until_v is None:
+ until_v = sys.maxsize
+ return svn_version >= until_v
+
+ def apply(self,
+ root_dir: Path,
+ extra_args: Optional[List[str]] = None) -> PatchResult:
+ """Apply a patch to a given directory."""
+ if not extra_args:
+ extra_args = []
+ # Cmd to apply a patch in the src unpack path.
+ cmd = [
+ 'patch', '-d',
+ root_dir.absolute(), '-f', '-p1', '--no-backup-if-mismatch', '-i',
+ self.patch_path().absolute()
+ ] + extra_args
+ try:
+ subprocess.run(cmd, encoding='utf-8', check=True, stdout=subprocess.PIPE)
+ except subprocess.CalledProcessError as e:
+ parsed_hunks = self.parsed_hunks()
+ failed_hunks_id_dict = parse_failed_patch_output(e.stdout)
+ failed_hunks = {}
+ for path, failed_hunk_ids in failed_hunks_id_dict.items():
+ hunks_for_file = parsed_hunks[path]
+ failed_hunks[path] = [
+ hunk for hunk in hunks_for_file if hunk.hunk_id in failed_hunk_ids
+ ]
+ return PatchResult(succeeded=False, failed_hunks=failed_hunks)
+ return PatchResult(succeeded=True)
+
+ def test_apply(self, root_dir: Path) -> PatchResult:
+ """Dry run applying a patch to a given directory."""
+ return self.apply(root_dir, ['--dry-run'])
+
+ def title(self) -> str:
+ return self.metadata['title']
diff --git a/llvm_tools/patch_utils_unittest.py b/llvm_tools/patch_utils_unittest.py
new file mode 100755
index 00000000..3dfe52b2
--- /dev/null
+++ b/llvm_tools/patch_utils_unittest.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+# Copyright 2022 The ChromiumOS Authors.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Unit tests for the patch_utils.py file."""
+
+from pathlib import Path
+import unittest
+import unittest.mock as mock
+
+import patch_utils as pu
+
+
+class TestPatchUtils(unittest.TestCase):
+ """Test the patch_utils."""
+
+ def test_from_to_dict(self):
+ """Test to and from dict conversion."""
+ d = TestPatchUtils._default_json_dict()
+ d['metadata'] = {
+ 'title': 'hello world',
+ 'info': [],
+ 'other_extra_info': {
+ 'extra_flags': [],
+ }
+ }
+ e = pu.PatchEntry.from_dict(TestPatchUtils._mock_dir(), d)
+ self.assertEqual(d, e.to_dict())
+
+ def test_can_patch_version(self):
+ """Test that patch application based on version is correct."""
+ base_dict = TestPatchUtils._default_json_dict()
+ workdir = TestPatchUtils._mock_dir()
+ e1 = pu.PatchEntry.from_dict(workdir, base_dict)
+ self.assertFalse(e1.can_patch_version(3))
+ self.assertTrue(e1.can_patch_version(4))
+ self.assertTrue(e1.can_patch_version(5))
+ self.assertFalse(e1.can_patch_version(9))
+ base_dict['version_range'] = {'until': 9}
+ e2 = pu.PatchEntry.from_dict(workdir, base_dict)
+ self.assertTrue(e2.can_patch_version(0))
+ self.assertTrue(e2.can_patch_version(5))
+ self.assertFalse(e2.can_patch_version(9))
+ base_dict['version_range'] = {'from': 4}
+ e3 = pu.PatchEntry.from_dict(workdir, base_dict)
+ self.assertFalse(e3.can_patch_version(3))
+ self.assertTrue(e3.can_patch_version(5))
+ self.assertTrue(e3.can_patch_version(1 << 31))
+ base_dict['version_range'] = {'from': 4, 'until': None}
+ e4 = pu.PatchEntry.from_dict(workdir, base_dict)
+ self.assertFalse(e4.can_patch_version(3))
+ self.assertTrue(e4.can_patch_version(5))
+ self.assertTrue(e4.can_patch_version(1 << 31))
+ base_dict['version_range'] = {'from': None, 'until': 9}
+ e5 = pu.PatchEntry.from_dict(workdir, base_dict)
+ self.assertTrue(e5.can_patch_version(0))
+ self.assertTrue(e5.can_patch_version(5))
+ self.assertFalse(e5.can_patch_version(9))
+
+ def test_parsed_hunks(self):
+ """Test that we can parse patch file hunks."""
+ m = mock.mock_open(read_data=_EXAMPLE_PATCH)
+
+ def mocked_open(self, *args, **kwargs):
+ return m(self, *args, **kwargs)
+
+ with mock.patch.object(Path, 'open', mocked_open):
+ e = pu.PatchEntry.from_dict(TestPatchUtils._mock_dir(),
+ TestPatchUtils._default_json_dict())
+ hunk_dict = e.parsed_hunks()
+
+ m.assert_called()
+ filename1 = 'clang/lib/Driver/ToolChains/Clang.cpp'
+ filename2 = 'llvm/lib/Passes/PassBuilder.cpp'
+ self.assertEqual(set(hunk_dict.keys()), {filename1, filename2})
+ hunk_list1 = hunk_dict[filename1]
+ hunk_list2 = hunk_dict[filename2]
+ self.assertEqual(len(hunk_list1), 1)
+ self.assertEqual(len(hunk_list2), 2)
+
+ def test_apply_success(self):
+ """Test that we can call apply."""
+ src_dir = TestPatchUtils._mock_dir('somewhere/llvm-project')
+ patch_dir = TestPatchUtils._mock_dir()
+ e = pu.PatchEntry.from_dict(patch_dir, TestPatchUtils._default_json_dict())
+ with mock.patch('subprocess.run', mock.MagicMock()):
+ result = e.apply(src_dir)
+ self.assertTrue(result.succeeded)
+
+ def test_parse_failed_patch_output(self):
+ """Test that we can call parse `patch` output."""
+ fixture = """
+checking file a/b/c.cpp
+Hunk #1 SUCCEEDED at 96 with fuzz 1.
+Hunk #12 FAILED at 77.
+Hunk #42 FAILED at 1979.
+checking file x/y/z.h
+Hunk #4 FAILED at 30.
+checking file works.cpp
+Hunk #1 SUCCEEDED at 96 with fuzz 1.
+"""
+ result = pu.parse_failed_patch_output(fixture)
+ self.assertEqual(result['a/b/c.cpp'], [12, 42])
+ self.assertEqual(result['x/y/z.h'], [4])
+ self.assertNotIn('works.cpp', result)
+
+ @staticmethod
+ def _default_json_dict():
+ return {
+ 'metadata': {
+ 'title': 'hello world',
+ },
+ 'platforms': [],
+ 'rel_patch_path': 'x/y/z',
+ 'version_range': {
+ 'from': 4,
+ 'until': 9,
+ }
+ }
+
+ @staticmethod
+ def _mock_dir(path: str = 'a/b/c'):
+ workdir = Path(path)
+ workdir = mock.MagicMock(workdir)
+ workdir.is_dir = lambda: True
+ workdir.joinpath = lambda x: Path(path).joinpath(x)
+ workdir.__truediv__ = lambda self, x: self.joinpath(x)
+ return workdir
+
+
+_EXAMPLE_PATCH = """
+diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
+index 5620a543438..099eb769ca5 100644
+--- a/clang/lib/Driver/ToolChains/Clang.cpp
++++ b/clang/lib/Driver/ToolChains/Clang.cpp
+@@ -3995,8 +3995,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
+ Args.hasArg(options::OPT_dA))
+ CmdArgs.push_back("-masm-verbose");
+
+- if (!TC.useIntegratedAs())
++ if (!TC.useIntegratedAs()) {
+ CmdArgs.push_back("-no-integrated-as");
++ CmdArgs.push_back("-mllvm");
++ CmdArgs.push_back("-enable-call-graph-profile-sort=false");
++ }
+
+ if (Args.hasArg(options::OPT_fdebug_pass_structure)) {
+ CmdArgs.push_back("-mdebug-pass");
+diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
+index c5fd68299eb..4c6e15eeeb9 100644
+--- a/llvm/lib/Passes/PassBuilder.cpp
++++ b/llvm/lib/Passes/PassBuilder.cpp
+@@ -212,6 +212,10 @@ static cl::opt<bool>
+ EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden,
+ cl::desc("Enable control height reduction optimization (CHR)"));
+
++static cl::opt<bool> EnableCallGraphProfileSort(
++ "enable-call-graph-profile-sort", cl::init(true), cl::Hidden,
++ cl::desc("Enable call graph profile pass for the new PM (default = on)"));
++
+ extern cl::opt<bool> EnableHotColdSplit;
+ extern cl::opt<bool> EnableOrderFileInstrumentation;
+
+@@ -939,7 +943,8 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
+ // Add the core optimizing pipeline.
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));
+
+- MPM.addPass(CGProfilePass());
++ if (EnableCallGraphProfileSort)
++ MPM.addPass(CGProfilePass());
+
+ // Now we need to do some global optimization transforms.
+ // FIXME: It would seem like these should come first in the optimization
+"""
+
+if __name__ == '__main__':
+ unittest.main()