diff options
author | Yu-Ju Hong <yjhong@chromium.org> | 2014-11-07 12:59:54 -0800 |
---|---|---|
committer | chrome-internal-fetch <chrome-internal-fetch@google.com> | 2014-11-12 02:42:03 +0000 |
commit | 20808a86ba340766ddf409282a1bfcba5b541f46 (patch) | |
tree | fa32494a6f96f8d6bf6b360f882b25dabd9c1aeb /cbuildbot/stages/completion_stages.py | |
parent | ef6ccb2b11698c5adcfce60e45270ce80f8e131a (diff) | |
download | chromite-20808a86ba340766ddf409282a1bfcba5b541f46.tar.gz |
Determine fully verified changes based on per-config change information
Now that each CQ slave records relevant/irrelevant CLs in CIDB, the
next step is to utilize the board-specific information to submit and
reject changes:
1. All fully-verified changes should be submitted, even upon slave
failures. This should increase the submission rate.
2. Only CLs relevant to the failed build/slave should be considered
suspects and potentially be rejected. This should improve the
false rejection rate.
This CL implements the submission logic in (1), while the rejection
logic (2) will be implemented in a separate CL.
The new board-specific submission logic will co-exist with the
existing logic until we verify that there is no regression. This means
that even though CQ master runs the new submission logic, it will
still use the old logic to submit CLs.
Side effects of this CL:
- Migrate CommitQueueCompletionStage unittest from mox to mock.
- Move GetOptionForChange and its related functions from validation_pool
to triage_lib.
- Fix patch.GetChangesAsString to print internal changes with a
prefix (*).
BUG=chromium:422639
TEST=`cbuildbot/run_tests`
TEST=`cbuildbot --remote --debug master-paladin`
Change-Id: Ic018175b808a3029727547261a471c6d35d2c1f3
Reviewed-on: https://chromium-review.googlesource.com/228880
Tested-by: Yu-Ju Hong <yjhong@chromium.org>
Reviewed-by: Aviv Keshet <akeshet@chromium.org>
Commit-Queue: Yu-Ju Hong <yjhong@chromium.org>
Diffstat (limited to 'cbuildbot/stages/completion_stages.py')
-rw-r--r-- | cbuildbot/stages/completion_stages.py | 72 |
1 files changed, 66 insertions, 6 deletions
diff --git a/cbuildbot/stages/completion_stages.py b/cbuildbot/stages/completion_stages.py index 225ae3c37..29cb99336 100644 --- a/cbuildbot/stages/completion_stages.py +++ b/cbuildbot/stages/completion_stages.py @@ -21,6 +21,7 @@ from chromite.cbuildbot import validation_pool from chromite.cbuildbot.stages import generic_stages from chromite.cbuildbot.stages import sync_stages from chromite.lib import alerts +from chromite.lib import clactions from chromite.lib import cros_build_lib from chromite.lib import git from chromite.lib import portage_util @@ -568,6 +569,64 @@ class CommitQueueCompletionStage(MasterSlaveSyncCompletionStage): if self._run.config.master: self.CQMasterHandleFailure(failing, inflight, no_stat) + def _GetSlaveMappingAndCLActions(self, changes): + """Query CIDB to for slaves and CL actions. + + Args: + changes: A list of GerritPatch instances to examine. + + Returns: + A tuple of (config_map, action_history), where the config_map + is a dictionary mapping build_id to config name for all slaves + in this run, and action_history is a list of all CL actions + associated with |changes|. + """ + # build_id is the master build id for the run. + build_id, db = self._run.GetCIDBHandle() + assert db, 'No database connection to use.' + slave_list = db.GetSlaveStatuses(build_id) + action_history = db.GetActionsForChanges(changes) + + config_map = dict() + # Build the build_id to config_name mapping. Note that if add the + # "relaunch" feature in cbuildbot, there may be multiple build ids + # for the same slave config. We will have to make sure + # GetSlaveStatuses() returns only the valid slaves (e.g. with + # latest start time). + for d in slave_list: + config_map[d['id']] = d['build_config'] + + return config_map, action_history + + def GetRelevantChangesForSlaves(self, changes, no_stat): + """Compile a set of relevant changes for each slave. + + Args: + changes: A list of GerritPatch instances to examine. + no_stat: Set of builder names of slave builders that had status None. + + Returns: + A dictionary mapping a slave config name to a set of relevant changes. + """ + # Retrieve the slaves and clactions from CIDB. + config_map, action_history = self._GetSlaveMappingAndCLActions(changes) + changes_by_build_id = clactions.GetRelevantChangesForBuilds( + changes, action_history, config_map.keys()) + + # Convert index from build_ids to config names. + changes_by_config = dict() + for k, v in changes_by_build_id.iteritems(): + changes_by_config[config_map[k]] = v + + for config in no_stat: + # If a slave is in |no_stat|, it means that the slave never + # finished applying the changes in the sync stage. Hence the CL + # pickup actions for this slave may be + # inaccurate. Conservatively assume all changes are relevant. + changes_by_config[config] = set(changes) + + return changes_by_config + def CQMasterHandleFailure(self, failing, inflight, no_stat): """Handle changes in the validation pool upon build failure or timeout. @@ -581,15 +640,16 @@ class CommitQueueCompletionStage(MasterSlaveSyncCompletionStage): no_stat: Set of builder names of slave builders that had status None. """ messages = self._GetFailedMessages(failing) + self.SendInfraAlertIfNeeded(failing, inflight, no_stat) + # Start with all the changes in the validation pool. changes = self.sync_stage.pool.changes + changes_by_config = self.GetRelevantChangesForSlaves(changes, no_stat) - self.SendInfraAlertIfNeeded(failing, inflight, no_stat) - - if failing and not inflight and not no_stat: - # Even if there was a failure, we can submit the changes that indicate - # that they don't care about this failure. - changes = self.sync_stage.pool.SubmitPartialPool(messages) + # Even if there was a failure, we can submit the changes that indicate + # that they don't care about this failure. + changes = self.sync_stage.pool.SubmitPartialPool( + changes, messages, changes_by_config, failing, inflight, no_stat) tot_sanity = self._ToTSanity( self._run.config.sanity_check_slaves, self._slave_statuses) |