summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYu-Ju Hong <yjhong@chromium.org>2014-11-07 12:59:54 -0800
committerchrome-internal-fetch <chrome-internal-fetch@google.com>2014-11-12 02:42:03 +0000
commit20808a86ba340766ddf409282a1bfcba5b541f46 (patch)
treefa32494a6f96f8d6bf6b360f882b25dabd9c1aeb
parentef6ccb2b11698c5adcfce60e45270ce80f8e131a (diff)
downloadchromite-20808a86ba340766ddf409282a1bfcba5b541f46.tar.gz
Determine fully verified changes based on per-config change information
Now that each CQ slave records relevant/irrelevant CLs in CIDB, the next step is to utilize the board-specific information to submit and reject changes: 1. All fully-verified changes should be submitted, even upon slave failures. This should increase the submission rate. 2. Only CLs relevant to the failed build/slave should be considered suspects and potentially be rejected. This should improve the false rejection rate. This CL implements the submission logic in (1), while the rejection logic (2) will be implemented in a separate CL. The new board-specific submission logic will co-exist with the existing logic until we verify that there is no regression. This means that even though CQ master runs the new submission logic, it will still use the old logic to submit CLs. Side effects of this CL: - Migrate CommitQueueCompletionStage unittest from mox to mock. - Move GetOptionForChange and its related functions from validation_pool to triage_lib. - Fix patch.GetChangesAsString to print internal changes with a prefix (*). BUG=chromium:422639 TEST=`cbuildbot/run_tests` TEST=`cbuildbot --remote --debug master-paladin` Change-Id: Ic018175b808a3029727547261a471c6d35d2c1f3 Reviewed-on: https://chromium-review.googlesource.com/228880 Tested-by: Yu-Ju Hong <yjhong@chromium.org> Reviewed-by: Aviv Keshet <akeshet@chromium.org> Commit-Queue: Yu-Ju Hong <yjhong@chromium.org>
-rw-r--r--cbuildbot/failures_lib.py11
-rw-r--r--cbuildbot/stages/completion_stages.py72
-rwxr-xr-xcbuildbot/stages/completion_stages_unittest.py92
-rw-r--r--cbuildbot/stages/sync_stages.py23
-rwxr-xr-xcbuildbot/stages/sync_stages_unittest.py11
-rw-r--r--cbuildbot/triage_lib.py176
-rwxr-xr-xcbuildbot/triage_lib_unittest.py63
-rw-r--r--cbuildbot/validation_pool.py180
-rwxr-xr-xcbuildbot/validation_pool_unittest.py10
-rw-r--r--lib/clactions.py24
-rw-r--r--lib/patch.py8
11 files changed, 511 insertions, 159 deletions
diff --git a/cbuildbot/failures_lib.py b/cbuildbot/failures_lib.py
index 697ea9863..917ec3c24 100644
--- a/cbuildbot/failures_lib.py
+++ b/cbuildbot/failures_lib.py
@@ -295,6 +295,17 @@ class BuildFailureMessage(object):
def __str__(self):
return self.message
+ def GetFailingStages(self):
+ """Get a list of the failing stage prefixes from tracebacks.
+
+ Returns:
+ A list of failing stage prefixes if there are tracebacks; None otherwise.
+ """
+ failing_stages = None
+ if self.tracebacks:
+ failing_stages = set(x.failed_prefix for x in self.tracebacks)
+ return failing_stages
+
def MatchesFailureType(self, cls):
"""Check if all of the tracebacks match the specified failure type."""
for tb in self.tracebacks:
diff --git a/cbuildbot/stages/completion_stages.py b/cbuildbot/stages/completion_stages.py
index 225ae3c37..29cb99336 100644
--- a/cbuildbot/stages/completion_stages.py
+++ b/cbuildbot/stages/completion_stages.py
@@ -21,6 +21,7 @@ from chromite.cbuildbot import validation_pool
from chromite.cbuildbot.stages import generic_stages
from chromite.cbuildbot.stages import sync_stages
from chromite.lib import alerts
+from chromite.lib import clactions
from chromite.lib import cros_build_lib
from chromite.lib import git
from chromite.lib import portage_util
@@ -568,6 +569,64 @@ class CommitQueueCompletionStage(MasterSlaveSyncCompletionStage):
if self._run.config.master:
self.CQMasterHandleFailure(failing, inflight, no_stat)
+ def _GetSlaveMappingAndCLActions(self, changes):
+ """Query CIDB to for slaves and CL actions.
+
+ Args:
+ changes: A list of GerritPatch instances to examine.
+
+ Returns:
+ A tuple of (config_map, action_history), where the config_map
+ is a dictionary mapping build_id to config name for all slaves
+ in this run, and action_history is a list of all CL actions
+ associated with |changes|.
+ """
+ # build_id is the master build id for the run.
+ build_id, db = self._run.GetCIDBHandle()
+ assert db, 'No database connection to use.'
+ slave_list = db.GetSlaveStatuses(build_id)
+ action_history = db.GetActionsForChanges(changes)
+
+ config_map = dict()
+ # Build the build_id to config_name mapping. Note that if add the
+ # "relaunch" feature in cbuildbot, there may be multiple build ids
+ # for the same slave config. We will have to make sure
+ # GetSlaveStatuses() returns only the valid slaves (e.g. with
+ # latest start time).
+ for d in slave_list:
+ config_map[d['id']] = d['build_config']
+
+ return config_map, action_history
+
+ def GetRelevantChangesForSlaves(self, changes, no_stat):
+ """Compile a set of relevant changes for each slave.
+
+ Args:
+ changes: A list of GerritPatch instances to examine.
+ no_stat: Set of builder names of slave builders that had status None.
+
+ Returns:
+ A dictionary mapping a slave config name to a set of relevant changes.
+ """
+ # Retrieve the slaves and clactions from CIDB.
+ config_map, action_history = self._GetSlaveMappingAndCLActions(changes)
+ changes_by_build_id = clactions.GetRelevantChangesForBuilds(
+ changes, action_history, config_map.keys())
+
+ # Convert index from build_ids to config names.
+ changes_by_config = dict()
+ for k, v in changes_by_build_id.iteritems():
+ changes_by_config[config_map[k]] = v
+
+ for config in no_stat:
+ # If a slave is in |no_stat|, it means that the slave never
+ # finished applying the changes in the sync stage. Hence the CL
+ # pickup actions for this slave may be
+ # inaccurate. Conservatively assume all changes are relevant.
+ changes_by_config[config] = set(changes)
+
+ return changes_by_config
+
def CQMasterHandleFailure(self, failing, inflight, no_stat):
"""Handle changes in the validation pool upon build failure or timeout.
@@ -581,15 +640,16 @@ class CommitQueueCompletionStage(MasterSlaveSyncCompletionStage):
no_stat: Set of builder names of slave builders that had status None.
"""
messages = self._GetFailedMessages(failing)
+ self.SendInfraAlertIfNeeded(failing, inflight, no_stat)
+
# Start with all the changes in the validation pool.
changes = self.sync_stage.pool.changes
+ changes_by_config = self.GetRelevantChangesForSlaves(changes, no_stat)
- self.SendInfraAlertIfNeeded(failing, inflight, no_stat)
-
- if failing and not inflight and not no_stat:
- # Even if there was a failure, we can submit the changes that indicate
- # that they don't care about this failure.
- changes = self.sync_stage.pool.SubmitPartialPool(messages)
+ # Even if there was a failure, we can submit the changes that indicate
+ # that they don't care about this failure.
+ changes = self.sync_stage.pool.SubmitPartialPool(
+ changes, messages, changes_by_config, failing, inflight, no_stat)
tot_sanity = self._ToTSanity(
self._run.config.sanity_check_slaves, self._slave_statuses)
diff --git a/cbuildbot/stages/completion_stages_unittest.py b/cbuildbot/stages/completion_stages_unittest.py
index 8d8c9dc5f..4baa39ac4 100755
--- a/cbuildbot/stages/completion_stages_unittest.py
+++ b/cbuildbot/stages/completion_stages_unittest.py
@@ -17,13 +17,16 @@ from chromite.cbuildbot import constants
from chromite.cbuildbot import failures_lib
from chromite.cbuildbot import manifest_version
from chromite.cbuildbot import results_lib
-from chromite.cbuildbot import validation_pool
from chromite.cbuildbot.stages import completion_stages
from chromite.cbuildbot.stages import generic_stages_unittest
from chromite.cbuildbot.stages import sync_stages_unittest
from chromite.cbuildbot.stages import sync_stages
from chromite.lib import alerts
+from chromite.lib import clactions
from chromite.lib import cros_test_lib
+from chromite.lib import patch_unittest
+
+import mock
# pylint: disable=R0901,W0212
@@ -291,7 +294,7 @@ class CanaryCompletionStageTest(
class CommitQueueCompletionStageTest(
- generic_stages_unittest.AbstractStageTest):
+ generic_stages_unittest.AbstractStageTest, patch_unittest.MockPatchBase):
"""Tests how CQ master handles changes in CommitQueueCompletionStage."""
BOT_ID = 'master-paladin'
@@ -307,30 +310,32 @@ class CommitQueueCompletionStageTest(
self.partial_submit_changes = ['C', 'D']
self.other_changes = ['A', 'B']
self.changes = self.other_changes + self.partial_submit_changes
+ self.tot_sanity_mock = self.PatchObject(
+ completion_stages.CommitQueueCompletionStage, '_ToTSanity')
- self.mox.StubOutWithMock(completion_stages.MasterSlaveSyncCompletionStage,
- 'HandleFailure')
- self.mox.StubOutWithMock(completion_stages.CommitQueueCompletionStage,
- '_ToTSanity')
- self.mox.StubOutWithMock(alerts, '_SendEmailHelper')
-
+ self.alert_email_mock = self.PatchObject(alerts, '_SendEmailHelper')
+ self.PatchObject(completion_stages.MasterSlaveSyncCompletionStage,
+ 'HandleFailure')
self.PatchObject(completion_stages.CommitQueueCompletionStage,
'_GetFailedMessages')
self.PatchObject(completion_stages.CommitQueueCompletionStage,
'ShouldDisableAlerts', return_value=False)
+ self.PatchObject(completion_stages.CommitQueueCompletionStage,
+ '_GetSlaveMappingAndCLActions',
+ return_value=(dict(), []))
+ self.PatchObject(clactions, 'GetRelevantChangesForBuilds')
def ConstructStage(self):
"""Returns a CommitQueueCompletionStage object."""
+ # pylint: disable=W0201
sync_stage = sync_stages.CommitQueueSyncStage(self._run)
- sync_stage.pool = mox.MockObject(validation_pool.ValidationPool)
+ sync_stage.pool = mock.MagicMock()
sync_stage.pool.changes = self.changes
- self.mox.StubOutWithMock(sync_stage.pool,
- 'HandleValidationFailure')
- self.mox.StubOutWithMock(sync_stage.pool,
- 'HandleValidationTimeout')
- self.mox.StubOutWithMock(sync_stage.pool,
- 'SubmitPartialPool')
+ sync_stage.pool.handle_failure_mock = self.PatchObject(
+ sync_stage.pool, 'HandleValidationFailure')
+ sync_stage.pool.handle_timeout_mock = self.PatchObject(
+ sync_stage.pool, 'HandleValidationTimeout')
return completion_stages.CommitQueueCompletionStage(
self._run, sync_stage, success=True)
@@ -349,28 +354,27 @@ class CommitQueueCompletionStageTest(
stage = self.ConstructStage()
if submit_partial:
- stage.sync_stage.pool.SubmitPartialPool(
- mox.IgnoreArg()).AndReturn(self.other_changes)
+ self.PatchObject(stage.sync_stage.pool, 'SubmitPartialPool',
+ return_value=self.other_changes)
+ else:
+ self.PatchObject(stage.sync_stage.pool, 'SubmitPartialPool',
+ return_value=self.changes)
- if alert:
- alerts._SendEmailHelper(mox.IgnoreArg(), mox.IgnoreArg(), mox.IgnoreArg(),
- mox.IgnoreArg())
+ stage.CQMasterHandleFailure(failing, inflight, [])
- completion_stages.CommitQueueCompletionStage._ToTSanity(
- mox.IgnoreArg(), mox.IgnoreArg())
+ # Verify the calls.
+ self.tot_sanity_mock.assert_called_once_with(mock.ANY, mock.ANY)
+ if alert:
+ self.alert_email_mock.called_once_with(
+ mock.ANY, mock.ANY, mock.ANY, mock.ANY)
if handle_failure:
- stage.sync_stage.pool.HandleValidationFailure(
- mox.IgnoreArg(), no_stat=[], sanity=mox.IgnoreArg(),
- changes=self.other_changes)
+ stage.sync_stage.pool.handle_failure_mock.assert_called_once_with(
+ mock.ANY, no_stat=[], sanity=mock.ANY, changes=self.other_changes)
if handle_timeout:
- stage.sync_stage.pool.HandleValidationTimeout(
- sanity=mox.IgnoreArg(), changes=self.changes)
-
- self.mox.ReplayAll()
- stage.CQMasterHandleFailure(failing, inflight, [])
- self.mox.VerifyAll()
+ stage.sync_stage.pool.handle_timeout_mock.assert_called_once_with(
+ sanity=mock.ANY, changes=self.changes)
def testNoInflightBuildersNoInfraFail(self):
"""Test case where there are no inflight builders and no infra failures."""
@@ -421,6 +425,32 @@ class CommitQueueCompletionStageTest(
self.VerifyStage(failing, inflight, handle_failure=False,
handle_timeout=True, alert=True)
+ def testGetRelevantChangesForSlave(self):
+ """Tests the logic of GetRelevantChangesForSlaves()."""
+ change_set1 = set(self.GetPatches(how_many=2))
+ change_set2 = set(self.GetPatches(how_many=3))
+ changes = set.union(change_set1, change_set2)
+ no_stat = ['no_stat-paladin']
+ config_map = {'123': 'foo-paladin',
+ '124': 'bar-paladin',
+ '125': 'no_stat-paladin'}
+ changes_by_build_id = {'123': change_set1,
+ '124': change_set2}
+ # If a slave did not report status (no_stat), assume all changes
+ # are relevant.
+ expected = {'foo-paladin': change_set1,
+ 'bar-paladin': change_set2,
+ 'no_stat-paladin': changes}
+ self.PatchObject(completion_stages.CommitQueueCompletionStage,
+ '_GetSlaveMappingAndCLActions',
+ return_value=(config_map, []))
+ self.PatchObject(clactions, 'GetRelevantChangesForBuilds',
+ return_value=changes_by_build_id)
+
+ stage = self.ConstructStage()
+ results = stage.GetRelevantChangesForSlaves(changes, no_stat)
+ self.assertEqual(results, expected)
+
class PublishUprevChangesStageTest(generic_stages_unittest.AbstractStageTest):
"""Tests for the PublishUprevChanges stage."""
diff --git a/cbuildbot/stages/sync_stages.py b/cbuildbot/stages/sync_stages.py
index 6ce80e90d..172717917 100644
--- a/cbuildbot/stages/sync_stages.py
+++ b/cbuildbot/stages/sync_stages.py
@@ -24,6 +24,7 @@ from chromite.cbuildbot import lkgm_manager
from chromite.cbuildbot import manifest_version
from chromite.cbuildbot import repository
from chromite.cbuildbot import tree_status
+from chromite.cbuildbot import triage_lib
from chromite.cbuildbot import trybot_patch_pool
from chromite.cbuildbot import validation_pool
from chromite.cbuildbot.stages import generic_stages
@@ -961,7 +962,7 @@ class PreCQLauncherStage(SyncStage):
"""
configs_to_test = constants.PRE_CQ_DEFAULT_CONFIGS
try:
- result = validation_pool.GetOptionForChange(
+ result = triage_lib.GetOptionForChange(
self._build_root, change, 'GENERAL', 'pre-cq-configs')
if (result and result.split() and
all(c in cbuildbot_config.config for c in result.split())):
@@ -1012,7 +1013,7 @@ class PreCQLauncherStage(SyncStage):
"""
result = None
try:
- result = validation_pool.GetOptionForChange(
+ result = triage_lib.GetOptionForChange(
self._build_root, change, 'GENERAL', 'submit-in-pre-cq')
except ConfigParser.Error:
cros_build_lib.Error('%s has malformed config file', change,
@@ -1079,22 +1080,24 @@ class PreCQLauncherStage(SyncStage):
# there's no need to launch a trybot run.
plan = set(plan)
if not plan.issubset(screened_changes):
- logging.info('CLs waiting to be screened: %r',
- ' '.join(map(str, plan.difference(screened_changes))))
+ logging.info('CLs waiting to be screened: %s',
+ cros_patch.GetChangesAsString(
+ plan.difference(screened_changes)))
elif plan.issubset(verified):
- logging.info('CLs already verified: %r', ' '.join(map(str, plan)))
+ logging.info('CLs already verified: %s',
+ cros_patch.GetChangesAsString(plan))
elif plan.intersection(busy):
- logging.info('CLs currently being verified: %r',
- ' '.join(map(str, plan.intersection(busy))))
+ logging.info('CLs currently being verified: %s',
+ cros_patch.GetChangesAsString(plan.intersection(busy)))
if plan.difference(busy):
logging.info('CLs waiting on verification of dependencies: %r',
- ' '.join(map(str, plan.difference(busy))))
+ cros_patch.GetChangesAsString(plan.difference(busy)))
# TODO(akeshet): Consider using a database time rather than gerrit
# approval time and local clock for launch delay.
elif any(x.approval_timestamp + self.LAUNCH_DELAY * 60 > time.time()
for x in plan):
- logging.info('CLs waiting on launch delay: %r',
- ' '.join(map(str, plan)))
+ logging.info('CLs waiting on launch delay: %s',
+ cros_patch.GetChangesAsString(plan))
else:
pending_configs = clactions.GetPreCQConfigsToTest(plan, progress_map)
for config in pending_configs:
diff --git a/cbuildbot/stages/sync_stages_unittest.py b/cbuildbot/stages/sync_stages_unittest.py
index cd31c1285..09f6d111a 100755
--- a/cbuildbot/stages/sync_stages_unittest.py
+++ b/cbuildbot/stages/sync_stages_unittest.py
@@ -24,6 +24,7 @@ from chromite.cbuildbot import manifest_version
from chromite.cbuildbot import manifest_version_unittest
from chromite.cbuildbot import repository
from chromite.cbuildbot import tree_status
+from chromite.cbuildbot import triage_lib
from chromite.cbuildbot import validation_pool
from chromite.cbuildbot.stages import sync_stages
from chromite.cbuildbot.stages import generic_stages_unittest
@@ -426,35 +427,35 @@ class PreCQLauncherStageTest(MasterCQSyncTestCase):
change = MockPatch()
configs_to_test = cbuildbot_config.config.keys()[:5]
return_string = ' '.join(configs_to_test)
- self.PatchObject(validation_pool, 'GetOptionForChange',
+ self.PatchObject(triage_lib, 'GetOptionForChange',
return_value=return_string)
self.assertEqual(self.sync_stage.VerificationsForChange(change),
configs_to_test)
def testVerificationsForChangeMalformedConfigFile(self):
change = MockPatch()
- self.PatchObject(validation_pool, 'GetOptionForChange',
+ self.PatchObject(triage_lib, 'GetOptionForChange',
side_effect=ConfigParser.Error)
self.assertEqual(self.sync_stage.VerificationsForChange(change),
constants.PRE_CQ_DEFAULT_CONFIGS)
def testVerificationsForChangeNoSuchConfig(self):
change = MockPatch()
- self.PatchObject(validation_pool, 'GetOptionForChange',
+ self.PatchObject(triage_lib, 'GetOptionForChange',
return_value='this_config_does_not_exist')
self.assertEqual(self.sync_stage.VerificationsForChange(change),
constants.PRE_CQ_DEFAULT_CONFIGS)
def testVerificationsForChangeEmptyField(self):
change = MockPatch()
- self.PatchObject(validation_pool, 'GetOptionForChange',
+ self.PatchObject(triage_lib, 'GetOptionForChange',
return_value=' ')
self.assertEqual(self.sync_stage.VerificationsForChange(change),
constants.PRE_CQ_DEFAULT_CONFIGS)
def testVerificationsForChangeNoneField(self):
change = MockPatch()
- self.PatchObject(validation_pool, 'GetOptionForChange',
+ self.PatchObject(triage_lib, 'GetOptionForChange',
return_value=None)
self.assertEqual(self.sync_stage.VerificationsForChange(change),
constants.PRE_CQ_DEFAULT_CONFIGS)
diff --git a/cbuildbot/triage_lib.py b/cbuildbot/triage_lib.py
index 477713748..09d281bfa 100644
--- a/cbuildbot/triage_lib.py
+++ b/cbuildbot/triage_lib.py
@@ -6,6 +6,7 @@
from __future__ import print_function
+import ConfigParser
import logging
import os
import pprint
@@ -13,6 +14,7 @@ import pprint
from chromite.cbuildbot import cbuildbot_config
from chromite.cbuildbot import failures_lib
from chromite.cbuildbot import constants
+from chromite.lib import cros_build_lib
from chromite.lib import git
from chromite.lib import patch as cros_patch
from chromite.lib import portage_util
@@ -68,8 +70,79 @@ def GetAffectedOverlays(change, manifest, all_overlays):
return subdirs
+def _GetOptionFromConfigFile(config_path, section, option):
+ """Get |option| from |section| in |config_path|.
+
+ Args:
+ config_path: Filename to look at.
+ section: Section header name.
+ option: Option name.
+
+ Returns:
+ The value of the option.
+ """
+ parser = ConfigParser.SafeConfigParser()
+ parser.read(config_path)
+ if parser.has_option(section, option):
+ return parser.get(section, option)
+
+
+def GetOptionForChange(build_root, change, section, option):
+ """Get |option| from |section| in the config file for |change|.
+
+ Args:
+ build_root: The root of the checkout.
+ change: Change to examine, as a PatchQuery object.
+ section: Section header name.
+ option: Option name.
+
+ Returns:
+ The value of the option.
+ """
+ manifest = git.ManifestCheckout.Cached(build_root)
+ checkout = change.GetCheckout(manifest)
+ if checkout:
+ dirname = checkout.GetPath(absolute=True)
+ config_path = os.path.join(dirname, 'COMMIT-QUEUE.ini')
+ return _GetOptionFromConfigFile(config_path, section, option)
+
+
+def GetStagesToIgnoreForChange(build_root, change):
+ """Get a list of stages that the CQ should ignore for a given |change|.
+
+ The list of stage name prefixes to ignore for each project is specified in a
+ config file inside the project, named COMMIT-QUEUE.ini. The file would look
+ like this:
+
+ [GENERAL]
+ ignored-stages: HWTest VMTest
+
+ The CQ will submit changes to the given project even if the listed stages
+ failed. These strings are stage name prefixes, meaning that "HWTest" would
+ match any HWTest stage (e.g. "HWTest [bvt]" or "HWTest [foo]")
+
+ Args:
+ build_root: The root of the checkout.
+ change: Change to examine, as a PatchQuery object.
+
+ Returns:
+ A list of stages to ignore for the given |change|.
+ """
+ result = None
+ try:
+ result = GetOptionForChange(build_root, change, 'GENERAL',
+ 'ignored-stages')
+ except ConfigParser.Error:
+ cros_build_lib.Error('%s has malformed config file', change, exc_info=True)
+ return result.split() if result else []
+
+
class CategorizeChanges(object):
- """A collection of methods to help categorize GerritPatch changes."""
+ """A collection of methods to help categorize GerritPatch changes.
+
+ This class is mainly used on a build slave to categorize changes
+ applied in the build.
+ """
@classmethod
def ClassifyOverlayChanges(cls, changes, config, build_root, manifest):
@@ -273,8 +346,9 @@ class CalculateSuspects(object):
return suspects
@classmethod
- def FilterChromiteChanges(cls, changes):
- """Returns a list of chromite changes in |changes|."""
+ def FilterChangesForInfraFail(cls, changes):
+ """Returns a list of changes responsible for infra failures."""
+ # Chromite changes could cause infra failures.
return [x for x in changes if x.project == constants.CHROMITE_PROJECT]
@classmethod
@@ -369,7 +443,7 @@ class CalculateSuspects(object):
logging.warning(
'Detected that the build failed due to non-lab infrastructure '
'issue(s). Will only reject chromite changes')
- return set(cls.FilterChromiteChanges(changes))
+ return set(cls.FilterChangesForInfraFail(changes))
if all(message and message.IsPackageBuildFailure()
for message in messages):
@@ -458,3 +532,97 @@ class CalculateSuspects(object):
if overlays is None or overlays.issubset(responsible_overlays):
candidates.append(change)
return candidates
+
+ @classmethod
+ def _CanIgnoreFailures(cls, messages, change, build_root):
+ """Examine whether we can ignore the failures for |change|.
+
+ Examine the |failed_messages| to see if we are allowed to ignore
+ the failures base on the per-repository settings in
+ COMMIT_QUEUE.ini.
+
+ Args:
+ messages: A list of BuildFailureMessage or NoneType objects from
+ the failed slaves.
+ change: A GerritPatch instance to examine.
+ build_root: Build root directory.
+
+ Returns:
+ True if we can ignore the failures; False otherwise.
+ """
+ # Some repositories may opt to ignore certain stage failures.
+ failing_stages = set()
+ if any(x.GetFailingStages() is None for x in messages):
+ # If there are no tracebacks, that means that the builder
+ # did not report its status properly. We don't know what
+ # stages failed and cannot safely ignore any stage.
+ return False
+
+ for message in messages:
+ failing_stages.update(message.GetFailingStages())
+ ignored_stages = GetStagesToIgnoreForChange(build_root, change)
+ if ignored_stages and failing_stages.issubset(ignored_stages):
+ return True
+
+ return False
+
+ @classmethod
+ def GetFullyVerfiedChanges(cls, changes, changes_by_config, no_stat, failing,
+ inflight, messages, build_root):
+
+ """Examines build failures and returns a set of fully verified changes.
+
+ A change is fully verified if all the build configs relevant to
+ this change have either passed or failed in a manner that can be
+ safely ignored by the change.
+
+ Args:
+ changes: A list of GerritPatch instances to examine.
+ changes_by_config: A dictionary of relevant changes indexed by the
+ config names.
+ failing: Names of the builders that failed.
+ inflight: Names of the builders that timed out.
+ no_stat: Set of builder names of slave builders that had status None.
+ messages: A list of BuildFailureMessage or NoneType objects from
+ the failed slaves.
+ build_root: Build root directory.
+
+ Returns:
+ A set of fully verified changes.
+ """
+ changes = set(changes)
+ no_stat = set(no_stat)
+ failing = set(failing)
+ inflight = set(inflight)
+ # Verify that every change is at least tested on one slave. If
+ # not, somethings has gone seriously wrong. No changes should be
+ # marked fully verified.
+ all_tested_changes = set()
+ for tested_changes in changes_by_config.itervalues():
+ all_tested_changes.update(tested_changes)
+ untested_changes = changes - all_tested_changes
+ if untested_changes:
+ logging.warning('Some changes were not tested on any slave: %s',
+ cros_patch.GetChangesAsString(untested_changes))
+ return set()
+
+ fully_verified = set()
+ for change in all_tested_changes:
+ # If all relevant configs associated with a change passed, the
+ # change is fully verified.
+ relevant_configs = [k for k, v in changes_by_config.iteritems() if
+ change in v]
+ if any(x in set.union(no_stat, inflight) for x in relevant_configs):
+ continue
+
+ failed_configs = [x for x in relevant_configs if x in failing]
+ if not failed_configs:
+ fully_verified.add(change)
+ else:
+ # Examine the failures and see if we can safely ignore them
+ # for the change.
+ failed_messages = [x for x in messages if x.builder in failed_configs]
+ if cls._CanIgnoreFailures(failed_messages, change, build_root):
+ fully_verified.add(change)
+
+ return fully_verified
diff --git a/cbuildbot/triage_lib_unittest.py b/cbuildbot/triage_lib_unittest.py
index e6c75a6ce..5db0d2840 100755
--- a/cbuildbot/triage_lib_unittest.py
+++ b/cbuildbot/triage_lib_unittest.py
@@ -237,5 +237,68 @@ class TestFindSuspects(patch_unittest.MockPatchBase):
self.assertEquals(candidates, changes[1:])
+class TestGetFullyVerifiedChanges(patch_unittest.MockPatchBase):
+ """Tests GetFullyVerifiedChanges() and related functions."""
+
+ def setUp(self):
+ self.build_root = '/foo/build/root'
+ self.changes = self.GetPatches(how_many=5)
+ self.PatchObject(triage_lib.CalculateSuspects, '_CanIgnoreFailures')
+
+ def testChangesNoAllTested(self):
+ """Tests that we assume no changes are fully verified in this case."""
+ no_stat = failing = messages = []
+ inflight = ['foo-paladin']
+ changes_by_config = {'foo-paladin': []}
+
+ verified = triage_lib.CalculateSuspects.GetFullyVerfiedChanges(
+ self.changes, changes_by_config, no_stat, failing, inflight,
+ messages, self.build_root)
+
+ self.assertEquals(verified, set())
+
+ def testChangesNotVerified(self):
+ """Tests that changes are not verified if builds failed prematurely."""
+ failing = messages = []
+ inflight = ['foo-paladin']
+ no_stat = ['puppy-paladin']
+ changes_by_config = {'foo-paladin': set(self.changes[:2]),
+ 'bar-paladin': set(self.changes),
+ 'puppy-paladin': set(self.changes[-2:])}
+
+ verified = triage_lib.CalculateSuspects.GetFullyVerfiedChanges(
+ self.changes, changes_by_config, no_stat, failing, inflight,
+ messages, self.build_root)
+ self.assertEquals(verified, set(self.changes[2:-2]))
+
+ def testChangesNotVerifiedOnFailures(self):
+ """Tests that changes are not verified if failures cannot be ignored."""
+ messages = no_stat = inflight = []
+ failing = ['cub-paladin']
+ changes_by_config = {'bar-paladin': set(self.changes),
+ 'cub-paladin': set(self.changes[:2])}
+
+ self.PatchObject(triage_lib.CalculateSuspects, '_CanIgnoreFailures',
+ return_value=False)
+ verified = triage_lib.CalculateSuspects.GetFullyVerfiedChanges(
+ self.changes, changes_by_config, no_stat, failing, inflight,
+ messages, self.build_root)
+ self.assertEquals(verified, set(self.changes[2:]))
+
+ def testChangesVerifiedWhenFailuresCanBeIgnored(self):
+ """Tests that changes are verified if failures can be ignored."""
+ messages = no_stat = inflight = []
+ failing = ['cub-paladin']
+ changes_by_config = {'bar-paladin': set(self.changes),
+ 'cub-paladin': set(self.changes[:2])}
+
+ self.PatchObject(triage_lib.CalculateSuspects, '_CanIgnoreFailures',
+ return_value=True)
+ verified = triage_lib.CalculateSuspects.GetFullyVerfiedChanges(
+ self.changes, changes_by_config, no_stat, failing, inflight,
+ messages, self.build_root)
+ self.assertEquals(verified, set(self.changes))
+
+
if __name__ == '__main__':
cros_test_lib.main()
diff --git a/cbuildbot/validation_pool.py b/cbuildbot/validation_pool.py
index cafc9f664..ad5e87abc 100644
--- a/cbuildbot/validation_pool.py
+++ b/cbuildbot/validation_pool.py
@@ -10,7 +10,6 @@ ready for the commit queue to try.
from __future__ import print_function
-import ConfigParser
import contextlib
import cPickle
import functools
@@ -206,74 +205,6 @@ def _RunCommand(cmd, dryrun):
cros_build_lib.Error('Command failed', exc_info=True)
-def _GetOptionFromConfigFile(config_path, section, option):
- """Get |option| from |section| in |config_path|.
-
- Args:
- config_path: Filename to look at.
- section: Section header name.
- option: Option name.
-
- Returns:
- The value of the option.
- """
- parser = ConfigParser.SafeConfigParser()
- parser.read(config_path)
- if parser.has_option(section, option):
- return parser.get(section, option)
-
-
-# TODO(akeshet): Move this function to somewhere more sensible.
-def GetOptionForChange(build_root, change, section, option):
- """Get |option| from |section| in the config file for |change|.
-
- Args:
- build_root: The root of the checkout.
- change: Change to examine, as a PatchQuery object.
- section: Section header name.
- option: Option name.
-
- Returns:
- The value of the option.
- """
- manifest = git.ManifestCheckout.Cached(build_root)
- checkout = change.GetCheckout(manifest)
- if checkout:
- dirname = checkout.GetPath(absolute=True)
- config_path = os.path.join(dirname, 'COMMIT-QUEUE.ini')
- return _GetOptionFromConfigFile(config_path, section, option)
-
-
-def GetStagesToIgnoreForChange(build_root, change):
- """Get a list of stages that the CQ should ignore for a given |change|.
-
- The list of stage name prefixes to ignore for each project is specified in a
- config file inside the project, named COMMIT-QUEUE.ini. The file would look
- like this:
-
- [GENERAL]
- ignored-stages: HWTest VMTest
-
- The CQ will submit changes to the given project even if the listed stages
- failed. These strings are stage name prefixes, meaning that "HWTest" would
- match any HWTest stage (e.g. "HWTest [bvt]" or "HWTest [foo]")
-
- Args:
- build_root: The root of the checkout.
- change: Change to examine, as a PatchQuery object.
-
- Returns:
- A list of stages to ignore for the given |change|.
- """
- result = None
- try:
- result = GetOptionForChange(build_root, change, 'GENERAL',
- 'ignored-stages')
- except ConfigParser.Error:
- cros_build_lib.Error('%s has malformed config file', change, exc_info=True)
- return result.split() if result else []
-
-
class GerritHelperNotAvailable(gerrit.GerritException):
"""Exception thrown when a specific helper is requested but unavailable."""
@@ -2081,52 +2012,111 @@ class ValidationPool(object):
if self.changes_that_failed_to_apply_earlier:
self._HandleApplyFailure(self.changes_that_failed_to_apply_earlier)
- def SubmitPartialPool(self, messages):
- """If the build failed, push any CLs that don't care about the failure.
+ @classmethod
+ def _GetShouldSubmitChanges(cls, changes, messages, build_root, no_stat):
+ """Examine failure |messages| to filter a set of |changes| to submit.
- In this function we calculate what CLs are definitely innocent and submit
- those CLs.
+ This function has been factored out from SubmitPartialPool() so
+ that we can switch to using _GetFullyVerifiedChanges() in the near
+ future. There is no functional change at all.
- Each project can specify a list of stages it does not care about in its
- COMMIT-QUEUE.ini file. Changes to that project will be submitted even if
- those stages fail.
+ TODO(yjhong): Deprecate this function once crbug.com/422639 is completed.
Args:
+ changes: A list of GerritPatch instances to examine.
messages: A list of BuildFailureMessage or NoneType objects from
the failed slaves.
+ build_root: Build root directory.
+ no_stat: Set of builder names of slave builders that had status None.
Returns:
- A list of the rejected changes.
+ A set of changes to submit.
"""
+ if no_stat:
+ # If a builder did not return any status, we do not have
+ # sufficient information about the failure. Don't submit any
+ # changes.
+ return set()
+
# Create a list of the failing stage prefixes.
- tracebacks = set()
+ failing_stages = set()
for message in messages:
- # If there are no tracebacks, that means that the builder did not
- # report its status properly. Don't submit anything.
- if not message or not message.tracebacks:
- return self.changes
- tracebacks.update(message.tracebacks)
- failing_stages = set(traceback.failed_prefix for traceback in tracebacks)
+ stages = None if not message else message.GetFailingStages()
+ if not stages:
+ # If there are no tracebacks, that means that the builder did not
+ # report its status properly. Don't submit anything.
+ return set()
+ failing_stages.update(stages)
# For each CL, look at whether it cares about the failures. Based on this,
# filter out CLs that don't care about the failure.
rejection_candidates = []
- for change in self.changes:
- ignored_stages = GetStagesToIgnoreForChange(self.build_root, change)
+ for change in changes:
+ ignored_stages = triage_lib.GetStagesToIgnoreForChange(build_root, change)
if not ignored_stages or not failing_stages.issubset(ignored_stages):
rejection_candidates.append(change)
- # TODO(yjhong): Deprecate the logic here once crbug.com/422639 is completed.
# Filter out innocent internal overlay changes from our list of candidates.
rejected = triage_lib.CalculateSuspects.FilterOutInnocentChanges(
- self.build_root, rejection_candidates, messages)
+ build_root, rejection_candidates, messages)
+
+ should_submit = set(changes) - set(rejected)
+ return should_submit
+
+ def SubmitPartialPool(self, changes, messages, changes_by_config, failing,
+ inflight, no_stat):
+ """If the build failed, push any CLs that don't care about the failure.
- # Actually submit the accepted changes.
- accepted = list(set(self.changes) - set(rejected))
- self.SubmitChanges(accepted)
+ In this function we calculate what CLs are definitely innocent and submit
+ those CLs.
+
+ Each project can specify a list of stages it does not care about in its
+ COMMIT-QUEUE.ini file. Changes to that project will be submitted even if
+ those stages fail.
+
+ Args:
+ changes: A list of GerritPatch instances to examine.
+ messages: A list of BuildFailureMessage or NoneType objects from
+ the failed slaves.
+ changes_by_config: A dictionary of relevant changes indexed by the
+ config names.
+ failing: Names of the builders that failed.
+ inflight: Names of the builders that timed out.
+ no_stat: Set of builder names of slave builders that had status None.
+
+ Returns:
+ A set of the non-submittable changes.
+ """
+ should_submit = self._GetShouldSubmitChanges(
+ changes, messages, self.build_root, no_stat)
+ fully_verified = triage_lib.CalculateSuspects.GetFullyVerfiedChanges(
+ changes, changes_by_config, failing, inflight, no_stat,
+ messages, self.build_root)
+
+ logging.info('The following changes will be submitted: %s',
+ cros_patch.GetChangesAsString(should_submit))
+ logging.info('The following changes would be submitted if we switch to '
+ 'using board-specific triaging logic: %s',
+ cros_patch.GetChangesAsString(fully_verified))
+
+ # TODO(yjhong): send the stats to either GS or CIDB.
+ if should_submit - fully_verified:
+ logging.warning('Board-specific triaging logic would not have '
+ 'submitted changes: %s',
+ cros_patch.GetChangesAsString(
+ should_submit - fully_verified))
+ if fully_verified - should_submit:
+ logging.info('Board-specific triaging logic would have '
+ 'submitted changes: %s',
+ cros_patch.GetChangesAsString(
+ fully_verified - should_submit))
+
+ # TODO(yjhong): Replace should_submit with fully_verified once we
+ # confirm there will be no regression (crbug.com/422639).
+ self.SubmitChanges(should_submit)
# Return the list of rejected changes.
- return rejected
+ return set(changes) - set(should_submit)
def _HandleApplyFailure(self, failures):
"""Handles changes that were not able to be applied cleanly.
@@ -2520,11 +2510,9 @@ class ValidationPool(object):
Args:
changes: A set of irrelevant changes to record.
"""
- formatted_changes = ['CL:%s' % cros_patch.AddPrefix(x, x.gerrit_number)
- for x in changes]
- if formatted_changes:
+ if changes:
logging.info('The following changes are irrelevant to this build: %s',
- ' '.join(formatted_changes))
+ cros_patch.GetChangesAsString(changes))
else:
logging.info('All changes are considered relevant to this build.')
diff --git a/cbuildbot/validation_pool_unittest.py b/cbuildbot/validation_pool_unittest.py
index f93cb754d..dde09baca 100755
--- a/cbuildbot/validation_pool_unittest.py
+++ b/cbuildbot/validation_pool_unittest.py
@@ -148,7 +148,7 @@ class IgnoredStagesTest(patch_unittest.MockPatchBase):
"""Tests for functions that calculate what stages to ignore."""
def GetOption(self, path, section='GENERAL', option='ignored-stages'):
- return validation_pool._GetOptionFromConfigFile(path, section, option)
+ return triage_lib._GetOptionFromConfigFile(path, section, option)
def testBadConfigFile(self):
"""Test if we can handle an incorrectly formatted config file."""
@@ -1337,7 +1337,6 @@ class MockValidationPool(partial_mock.PartialMock):
RemoveCommitReady = None
-
class BaseSubmitPoolTestCase(MoxBase):
"""Test full ability to submit and reject CL pools."""
@@ -1378,14 +1377,15 @@ class BaseSubmitPoolTestCase(MoxBase):
"""
# self.ignores maps changes to a list of stages to ignore. Use it.
self.PatchObject(
- validation_pool, 'GetStagesToIgnoreForChange',
+ triage_lib, 'GetStagesToIgnoreForChange',
side_effect=lambda _, change: self.ignores[change])
# Set up our pool and submit the patches.
pool = self.SetUpPatchPool(**kwargs)
messages = self.GetMessages()
if messages:
- actually_rejected = sorted(pool.SubmitPartialPool(self.GetMessages()))
+ actually_rejected = sorted(pool.SubmitPartialPool(
+ pool.changes, self.GetMessages(), dict(), [], [], []))
else:
actually_rejected = pool.SubmitChanges(self.patches)
@@ -1533,6 +1533,7 @@ class SubmitPartialPoolTest(BaseSubmitPoolTestCase):
self.messages = [
triage_lib_unittest.TestFindSuspects.GetFailedMessage(
[Exception()], stage=self.stage_name)]
+ self.PatchObject(triage_lib.CalculateSuspects, 'GetFullyVerfiedChanges')
def GetMessages(self):
"""Return a list of failure messages containing a single traceback.
@@ -1542,7 +1543,6 @@ class SubmitPartialPoolTest(BaseSubmitPoolTestCase):
"""
return self.messages
-
def IgnoreFailures(self, patch):
"""Set us up to ignore failures for the specified |patch|."""
self.ignores[patch] = [self.stage_name]
diff --git a/lib/clactions.py b/lib/clactions.py
index cbabe37b9..9c12bc5ac 100644
--- a/lib/clactions.py
+++ b/lib/clactions.py
@@ -374,3 +374,27 @@ def GetPreCQConfigsToTest(changes, progress_map):
return configs_to_test
+def GetRelevantChangesForBuilds(changes, action_history, build_ids):
+ """Get relevant changes for |build_ids| by examing CL actions.
+
+ Args:
+ changes: A list of GerritPatch instances to examine.
+ action_history: A list of CLAction instances.
+ build_ids: A list of build id to examine.
+
+ Returns:
+ A dictionary mapping a build id to a set of changes.
+ """
+ changes_map = dict()
+ relevant_actions = [x for x in action_history if x.build_id in build_ids]
+ for change in changes:
+ actions = ActionsForPatch(change, relevant_actions)
+ pickups = set([x.build_id for x in actions if
+ x.action == constants.CL_ACTION_PICKED_UP])
+ discards = set([x.build_id for x in actions if
+ x.action == constants.CL_ACTION_IRRELEVANT_TO_SLAVE])
+ relevant_build_ids = pickups - discards
+ for build_id in relevant_build_ids:
+ changes_map.setdefault(build_id, set()).add(change)
+
+ return changes_map
diff --git a/lib/patch.py b/lib/patch.py
index 0fd52fb11..61cb5b94d 100644
--- a/lib/patch.py
+++ b/lib/patch.py
@@ -1624,10 +1624,14 @@ def PrepareRemotePatches(patches):
return patch_info
-def GetChangesAsString(changes):
+def GetChangesAsString(changes, prefix='CL:', delimiter=' '):
"""Gets a human readable string listing |changes| in CL:1234 form.
Args:
changes: A list of GerritPatch objects.
+ prefix: Prefix to use. Defaults to 'CL:'
+ delimiter: Delimiter to use. Defaults to a space.
"""
- return ', '.join(sorted('CL:%s' % x.gerrit_number_str for x in changes))
+ formatted_changes = ['%s%s' % (prefix, AddPrefix(x, x.gerrit_number))
+ for x in changes]
+ return delimiter.join(sorted(formatted_changes))