diff options
author | Mike Frysinger <vapier@google.com> | 2023-06-09 23:45:01 -0400 |
---|---|---|
committer | Mike Frysinger <vapier@google.com> | 2023-06-23 00:42:43 -0400 |
commit | c400921fda11c5c802b7d4890474e9ec5a7e579b (patch) | |
tree | 0782cf225d1903fa9a3b90b941587c2eefbe4679 | |
parent | 4473f017130d829a6d067908b3331a3595133a25 (diff) | |
download | repohooks-c400921fda11c5c802b7d4890474e9ec5a7e579b.tar.gz |
pre-upload: run hooks in parallel using futures
Run all the hooks in parallel using Python futures. In the repohooks
tree with 12 commits, this cuts execution time from ~51 to ~31 secs.
In the libbrillo tree with 1 commit, it's still faster by a few
10's of msec. So this doesn't seem like it negatively impacts even
small sets of commits, while larger chains are a huge improvement.
Bug: None
Test: `./pre-upload.py` works and is faster
Change-Id: I347e5bdcd6206a768219769c51a6ddbaefefa44e
-rwxr-xr-x | pre-upload.py | 79 |
1 files changed, 52 insertions, 27 deletions
diff --git a/pre-upload.py b/pre-upload.py index daf3085..c539dfd 100755 --- a/pre-upload.py +++ b/pre-upload.py @@ -20,6 +20,7 @@ when developing. """ import argparse +import concurrent.futures import datetime import os import signal @@ -292,6 +293,7 @@ def _run_project_hooks_in_cwd( project_name: str, proj_dir: str, output: Output, + jobs: Optional[int] = None, from_git: bool = False, commit_list: Optional[List[str]] = None, ) -> rh.results.ProjectResults: @@ -301,6 +303,7 @@ def _run_project_hooks_in_cwd( project_name: The name of this project. proj_dir: The directory for this project (for passing on in metadata). output: Helper for summarizing output/errors to the user. + jobs: How many hooks to run in parallel. from_git: If true, we are called from git directly and repo should not be used. commit_list: A list of commits to run hooks against. If None or empty @@ -355,28 +358,40 @@ def _run_project_hooks_in_cwd( ignore_merged_commits=config.ignore_merged_commits) output.set_num_commits(len(commit_list)) - for commit in commit_list: - # Mix in some settings for our hooks. - os.environ['PREUPLOAD_COMMIT'] = commit - diff = rh.git.get_affected_files(commit) - desc = rh.git.get_commit_desc(commit) - os.environ['PREUPLOAD_COMMIT_MESSAGE'] = desc - - commit_summary = desc.split('\n', 1)[0] - output.commit_start(hooks, commit, commit_summary) - - for hook in hooks: - start = datetime.datetime.now() - hook_results = hook.hook(project, commit, desc, diff) - duration = datetime.datetime.now() - start - ret.add_results(hook_results) - (error, warning) = _process_hook_results(hook_results) - if error is not None or warning is not None: - if warning is not None: - output.hook_warning(hook, warning) - if error is not None: - output.hook_error(hook, error) - output.hook_finish(hook, duration) + def _run_hook(hook, project, commit, desc, diff): + """Run a hook, gather stats, and process its results.""" + start = datetime.datetime.now() + results = hook.hook(project, commit, desc, diff) + (error, warning) = _process_hook_results(results) + duration = datetime.datetime.now() - start + return (hook, results, error, warning, duration) + + with concurrent.futures.ThreadPoolExecutor(max_workers=jobs) as executor: + for commit in commit_list: + # Mix in some settings for our hooks. + os.environ['PREUPLOAD_COMMIT'] = commit + diff = rh.git.get_affected_files(commit) + desc = rh.git.get_commit_desc(commit) + os.environ['PREUPLOAD_COMMIT_MESSAGE'] = desc + + commit_summary = desc.split('\n', 1)[0] + output.commit_start(hooks, commit, commit_summary) + + futures = ( + executor.submit(_run_hook, hook, project, commit, desc, diff) + for hook in hooks + ) + future_results = ( + x.result() for x in concurrent.futures.as_completed(futures) + ) + for hook, hook_results, error, warning, duration in future_results: + ret.add_results(hook_results) + if error is not None or warning is not None: + if warning is not None: + output.hook_warning(hook, warning) + if error is not None: + output.hook_error(hook, error) + output.hook_finish(hook, duration) _attempt_fixes(ret, commit_list) @@ -386,6 +401,7 @@ def _run_project_hooks_in_cwd( def _run_project_hooks( project_name: str, proj_dir: Optional[str] = None, + jobs: Optional[int] = None, from_git: bool = False, commit_list: Optional[List[str]] = None, ) -> rh.results.ProjectResults: @@ -395,6 +411,7 @@ def _run_project_hooks( project_name: The name of project to run hooks for. proj_dir: If non-None, this is the directory the project is in. If None, we'll ask repo. + jobs: How many hooks to run in parallel. from_git: If true, we are called from git directly and repo should not be used. commit_list: A list of commits to run hooks against. If None or empty @@ -426,9 +443,9 @@ def _run_project_hooks( try: # Hooks assume they are run from the root of the project. os.chdir(proj_dir) - return _run_project_hooks_in_cwd(project_name, proj_dir, output, - from_git=from_git, - commit_list=commit_list) + return _run_project_hooks_in_cwd( + project_name, proj_dir, output, jobs=jobs, from_git=from_git, + commit_list=commit_list) finally: output.finish() os.chdir(pwd) @@ -437,6 +454,7 @@ def _run_project_hooks( def _run_projects_hooks( project_list: List[str], worktree_list: List[Optional[str]], + jobs: Optional[int] = None, from_git: bool = False, commit_list: Optional[List[str]] = None, ) -> bool: @@ -445,6 +463,7 @@ def _run_projects_hooks( Args: project_list: List of project names. worktree_list: List of project checkouts. + jobs: How many hooks to run in parallel. from_git: If true, we are called from git directly and repo should not be used. commit_list: A list of commits to run hooks against. If None or empty @@ -459,6 +478,7 @@ def _run_projects_hooks( result = _run_project_hooks( project, proj_dir=worktree, + jobs=jobs, from_git=from_git, commit_list=commit_list, ) @@ -546,6 +566,11 @@ def direct_main(argv): 'hooks get run, since some hooks are project-specific.' 'If not specified, `repo` will be used to figure this ' 'out based on the dir.') + parser.add_argument('-j', '--jobs', type=int, + help='Run up to this many hooks in parallel. Setting ' + 'to 1 forces serial execution, and the default ' + 'automatically chooses an appropriate number for the ' + 'current system.') parser.add_argument('commits', nargs='*', help='Check specific commits') opts = parser.parse_args(argv) @@ -571,8 +596,8 @@ def direct_main(argv): parser.error(f"Couldn't identify the project of {opts.dir}") try: - if _run_projects_hooks([opts.project], [opts.dir], from_git=opts.git, - commit_list=opts.commits): + if _run_projects_hooks([opts.project], [opts.dir], jobs=opts.jobs, + from_git=opts.git, commit_list=opts.commits): return 0 except KeyboardInterrupt: print('Aborting execution early due to user interrupt', file=sys.stderr) |