Determine fully verified changes based on per-config change information

Now that each CQ slave records relevant/irrelevant CLs in CIDB, the next step is to utilize the board-specific information to submit and reject changes: 1. All fully-verified changes should be submitted, even upon slave failures. This should increase the submission rate. 2. Only CLs relevant to the failed build/slave should be considered suspects and potentially be rejected. This should improve the false rejection rate. This CL implements the submission logic in (1), while the rejection logic (2) will be implemented in a separate CL. The new board-specific submission logic will co-exist with the existing logic until we verify that there is no regression. This means that even though CQ master runs the new submission logic, it will still use the old logic to submit CLs. Side effects of this CL: - Migrate CommitQueueCompletionStage unittest from mox to mock. - Move GetOptionForChange and its related functions from validation_pool to triage_lib. - Fix patch.GetChangesAsString to print internal changes with a prefix (*). BUG=chromium:422639 TEST=`cbuildbot/run_tests` TEST=`cbuildbot --remote --debug master-paladin` Change-Id: Ic018175b808a3029727547261a471c6d35d2c1f3 Reviewed-on: https://chromium-review.googlesource.com/228880 Tested-by: Yu-Ju Hong <yjhong@chromium.org> Reviewed-by: Aviv Keshet <akeshet@chromium.org> Commit-Queue: Yu-Ju Hong <yjhong@chromium.org>
author: Yu-Ju Hong <yjhong@chromium.org> 2014-11-07 12:59:54 -0800
committer: chrome-internal-fetch <chrome-internal-fetch@google.com> 2014-11-12 02:42:03 +0000
commit: 20808a86ba340766ddf409282a1bfcba5b541f46 (patch)
tree: fa32494a6f96f8d6bf6b360f882b25dabd9c1aeb /cbuildbot/stages/completion_stages.py
parent: ef6ccb2b11698c5adcfce60e45270ce80f8e131a (diff)
download: chromite-20808a86ba340766ddf409282a1bfcba5b541f46.tar.gz
1 files changed, 66 insertions, 6 deletions
diff --git a/cbuildbot/stages/completion_stages.py b/cbuildbot/stages/completion_stages.py
index 225ae3c37..29cb99336 100644
--- a/cbuildbot/stages/completion_stages.py
+++ b/cbuildbot/stages/completion_stages.py
@@ -21,6 +21,7 @@ from chromite.cbuildbot import validation_pool
 from chromite.cbuildbot.stages import generic_stages
 from chromite.cbuildbot.stages import sync_stages
 from chromite.lib import alerts
+from chromite.lib import clactions
 from chromite.lib import cros_build_lib
 from chromite.lib import git
 from chromite.lib import portage_util
@@ -568,6 +569,64 @@ class CommitQueueCompletionStage(MasterSlaveSyncCompletionStage):
     if self._run.config.master:
       self.CQMasterHandleFailure(failing, inflight, no_stat)
 
+  def _GetSlaveMappingAndCLActions(self, changes):
+    """Query CIDB to for slaves and CL actions.
+
+    Args:
+      changes: A list of GerritPatch instances to examine.
+
+    Returns:
+      A tuple of (config_map, action_history), where the config_map
+      is a dictionary mapping build_id to config name for all slaves
+      in this run, and action_history is a list of all CL actions
+      associated with |changes|.
+    """
+    # build_id is the master build id for the run.
+    build_id, db = self._run.GetCIDBHandle()
+    assert db, 'No database connection to use.'
+    slave_list = db.GetSlaveStatuses(build_id)
+    action_history = db.GetActionsForChanges(changes)
+
+    config_map = dict()
+    # Build the build_id to config_name mapping. Note that if add the
+    # "relaunch" feature in cbuildbot, there may be multiple build ids
+    # for the same slave config. We will have to make sure
+    # GetSlaveStatuses() returns only the valid slaves (e.g. with
+    # latest start time).
+    for d in slave_list:
+      config_map[d['id']] = d['build_config']
+
+    return config_map, action_history
+
+  def GetRelevantChangesForSlaves(self, changes, no_stat):
+    """Compile a set of relevant changes for each slave.
+
+    Args:
+      changes: A list of GerritPatch instances to examine.
+      no_stat: Set of builder names of slave builders that had status None.
+
+    Returns:
+      A dictionary mapping a slave config name to a set of relevant changes.
+    """
+    # Retrieve the slaves and clactions from CIDB.
+    config_map, action_history = self._GetSlaveMappingAndCLActions(changes)
+    changes_by_build_id = clactions.GetRelevantChangesForBuilds(
+        changes, action_history, config_map.keys())
+
+    # Convert index from build_ids to config names.
+    changes_by_config = dict()
+    for k, v in changes_by_build_id.iteritems():
+      changes_by_config[config_map[k]] = v
+
+    for config in no_stat:
+      # If a slave is in |no_stat|, it means that the slave never
+      # finished applying the changes in the sync stage. Hence the CL
+      # pickup actions for this slave may be
+      # inaccurate. Conservatively assume all changes are relevant.
+      changes_by_config[config] = set(changes)
+
+    return changes_by_config
+
   def CQMasterHandleFailure(self, failing, inflight, no_stat):
     """Handle changes in the validation pool upon build failure or timeout.
 
@@ -581,15 +640,16 @@ class CommitQueueCompletionStage(MasterSlaveSyncCompletionStage):
       no_stat: Set of builder names of slave builders that had status None.
     """
     messages = self._GetFailedMessages(failing)
+    self.SendInfraAlertIfNeeded(failing, inflight, no_stat)
+
     # Start with all the changes in the validation pool.
     changes = self.sync_stage.pool.changes
+    changes_by_config = self.GetRelevantChangesForSlaves(changes, no_stat)
 
-    self.SendInfraAlertIfNeeded(failing, inflight, no_stat)
-
-    if failing and not inflight and not no_stat:
-      # Even if there was a failure, we can submit the changes that indicate
-      # that they don't care about this failure.
-      changes = self.sync_stage.pool.SubmitPartialPool(messages)
+    # Even if there was a failure, we can submit the changes that indicate
+    # that they don't care about this failure.
+    changes = self.sync_stage.pool.SubmitPartialPool(
+        changes, messages, changes_by_config, failing, inflight, no_stat)
 
     tot_sanity = self._ToTSanity(
         self._run.config.sanity_check_slaves, self._slave_statuses)
author	Yu-Ju Hong <yjhong@chromium.org>	2014-11-07 12:59:54 -0800
committer	chrome-internal-fetch <chrome-internal-fetch@google.com>	2014-11-12 02:42:03 +0000
commit	20808a86ba340766ddf409282a1bfcba5b541f46 (patch)
tree	fa32494a6f96f8d6bf6b360f882b25dabd9c1aeb /cbuildbot/stages/completion_stages.py
parent	ef6ccb2b11698c5adcfce60e45270ce80f8e131a (diff)
download	chromite-20808a86ba340766ddf409282a1bfcba5b541f46.tar.gz