diff options
Diffstat (limited to 'cbuildbot/stages/completion_stages.py')
-rw-r--r-- | cbuildbot/stages/completion_stages.py | 852 |
1 files changed, 852 insertions, 0 deletions
diff --git a/cbuildbot/stages/completion_stages.py b/cbuildbot/stages/completion_stages.py new file mode 100644 index 000000000..2c6b08d34 --- /dev/null +++ b/cbuildbot/stages/completion_stages.py @@ -0,0 +1,852 @@ +# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Module containing the completion stages.""" + +from __future__ import print_function + +from chromite.cbuildbot import chroot_lib +from chromite.cbuildbot import commands +from chromite.cbuildbot import config_lib +from chromite.cbuildbot import failures_lib +from chromite.cbuildbot import results_lib +from chromite.cbuildbot import constants +from chromite.cbuildbot import manifest_version +from chromite.cbuildbot import tree_status +from chromite.cbuildbot.stages import generic_stages +from chromite.cbuildbot.stages import sync_stages +from chromite.lib import clactions +from chromite.lib import cros_logging as logging +from chromite.lib import git +from chromite.lib import patch as cros_patch +from chromite.lib import portage_util + + +def GetBuilderSuccessMap(builder_run, overall_success): + """Get the pass/fail status of all builders. + + A builder is marked as passed if all of its steps ran all of the way to + completion. We determine this by looking at whether all of the steps for + all of the constituent boards ran to completion. + + In cases where a builder does not have any boards, or has child boards, we + fall back and instead just look at whether the entire build was successful. + + Args: + builder_run: The builder run we wish to get the status of. + overall_success: The overall status of the build. + + Returns: + A dict, mapping the builder names to whether they succeeded. + """ + success_map = {} + for run in [builder_run] + builder_run.GetChildren(): + if run.config.boards and not run.config.child_configs: + success_map[run.config.name] = True + for board in run.config.boards: + board_runattrs = run.GetBoardRunAttrs(board) + if not board_runattrs.HasParallel('success'): + success_map[run.config.name] = False + else: + # If a builder does not have boards, or if it has child configs, we + # will just use the overall status instead. + success_map[run.config.name] = overall_success + return success_map + + +def CreateBuildFailureMessage(overlays, builder_name, dashboard_url): + """Creates a message summarizing the failures. + + Args: + overlays: The overlays used for the build. + builder_name: The name of the builder. + dashboard_url: The URL of the build. + + Returns: + A failures_lib.BuildFailureMessage object. + """ + internal = overlays in [constants.PRIVATE_OVERLAYS, + constants.BOTH_OVERLAYS] + details = [] + tracebacks = tuple(results_lib.Results.GetTracebacks()) + for x in tracebacks: + if isinstance(x.exception, failures_lib.CompoundFailure): + # We do not want the textual tracebacks included in the + # stringified CompoundFailure instance because this will be + # printed on the waterfall. + ex_str = x.exception.ToSummaryString() + else: + ex_str = str(x.exception) + # Truncate displayed failure reason to 1000 characters. + ex_str = ex_str[:200] + details.append('The %s stage failed: %s' % (x.failed_stage, ex_str)) + if not details: + details = ['cbuildbot failed'] + + # reason does not include builder name or URL. This is mainly for + # populating the "failure message" column in the stats sheet. + reason = ' '.join(details) + details.append('in %s' % dashboard_url) + msg = '%s: %s' % (builder_name, ' '.join(details)) + + return failures_lib.BuildFailureMessage(msg, tracebacks, internal, reason, + builder_name) + + +class ManifestVersionedSyncCompletionStage( + generic_stages.ForgivingBuilderStage): + """Stage that records board specific results for a unique manifest file.""" + + option_name = 'sync' + + def __init__(self, builder_run, sync_stage, success, **kwargs): + super(ManifestVersionedSyncCompletionStage, self).__init__( + builder_run, **kwargs) + self.sync_stage = sync_stage + self.success = success + # Message that can be set that well be sent along with the status in + # UpdateStatus. + self.message = None + + def GetBuildFailureMessage(self): + """Returns message summarizing the failures.""" + return CreateBuildFailureMessage(self._run.config.overlays, + self._run.config.name, + self._run.ConstructDashboardURL()) + + def PerformStage(self): + if not self.success: + self.message = self.GetBuildFailureMessage() + + if not config_lib.IsPFQType(self._run.config.build_type): + # Update the pass/fail status in the manifest-versions + # repo. Suite scheduler checks the build status to schedule + # suites. + self._run.attrs.manifest_manager.UpdateStatus( + success_map=GetBuilderSuccessMap(self._run, self.success), + message=self.message, dashboard_url=self.ConstructDashboardURL()) + + +class ImportantBuilderFailedException(failures_lib.StepFailure): + """Exception thrown when an important build fails to build.""" + + +class MasterSlaveSyncCompletionStage(ManifestVersionedSyncCompletionStage): + """Stage that records whether we passed or failed to build/test manifest.""" + + def __init__(self, *args, **kwargs): + super(MasterSlaveSyncCompletionStage, self).__init__(*args, **kwargs) + self._slave_statuses = {} + + def _GetLocalBuildStatus(self): + """Return the status for this build as a dictionary.""" + status = manifest_version.BuilderStatus.GetCompletedStatus(self.success) + status_obj = manifest_version.BuilderStatus(status, self.message) + return {self._bot_id: status_obj} + + def _FetchSlaveStatuses(self): + """Fetch and return build status for slaves of this build. + + If this build is not a master then return just the status of this build. + + Returns: + A dict of build_config name -> BuilderStatus objects, for all important + slave build configs. Build configs that never started will have a + BuilderStatus of MISSING. + """ + # Wait for slaves if we're a master, in production or mock-production. + # Otherwise just look at our own status. + slave_statuses = self._GetLocalBuildStatus() + if not self._run.config.master: + # The slave build returns its own status. + logging.warning('The build is not a master.') + elif self._run.options.mock_slave_status or not self._run.options.debug: + # The master build. + builders = self._GetSlaveConfigs() + builder_names = [b.name for b in builders] + timeout = None + build_id, db = self._run.GetCIDBHandle() + if db: + timeout = db.GetTimeToDeadline(build_id) + if timeout is None: + # Catch-all: This could happen if cidb is not setup, or the deadline + # query fails. + timeout = constants.MASTER_BUILD_TIMEOUT_DEFAULT_SECONDS + + if self._run.options.debug: + # For debug runs, wait for three minutes to ensure most code + # paths are executed. + logging.info('Waiting for 3 minutes only for debug run. ' + 'Would have waited for %s seconds.', timeout) + timeout = 3 * 60 + + manager = self._run.attrs.manifest_manager + if sync_stages.MasterSlaveLKGMSyncStage.sub_manager: + manager = sync_stages.MasterSlaveLKGMSyncStage.sub_manager + slave_statuses.update(manager.GetBuildersStatus( + self._run.attrs.metadata.GetValue('build_id'), + builder_names, + timeout=timeout)) + return slave_statuses + + def _HandleStageException(self, exc_info): + """Decide whether an exception should be treated as fatal.""" + # Besides the master, the completion stages also run on slaves, to report + # their status back to the master. If the build failed, they throw an + # exception here. For slave builders, marking this stage 'red' would be + # redundant, since the build itself would already be red. In this case, + # report a warning instead. + # pylint: disable=protected-access + exc_type = exc_info[0] + if (issubclass(exc_type, ImportantBuilderFailedException) and + not self._run.config.master): + return self._HandleExceptionAsWarning(exc_info) + else: + # In all other cases, exceptions should be treated as fatal. To + # implement this, we bypass ForgivingStage and call + # generic_stages.BuilderStage._HandleStageException explicitly. + return generic_stages.BuilderStage._HandleStageException(self, exc_info) + + def HandleSuccess(self): + """Handle a successful build. + + This function is called whenever the cbuildbot run is successful. + For the master, this will only be called when all slave builders + are also successful. This function may be overridden by subclasses. + """ + # We only promote for the pfq, not chrome pfq. + # TODO(build): Run this logic in debug mode too. + if (not self._run.options.debug and + config_lib.IsPFQType(self._run.config.build_type) and + self._run.config.master and + self._run.manifest_branch == 'master' and + self._run.config.build_type != constants.CHROME_PFQ_TYPE): + self._run.attrs.manifest_manager.PromoteCandidate() + if sync_stages.MasterSlaveLKGMSyncStage.sub_manager: + sync_stages.MasterSlaveLKGMSyncStage.sub_manager.PromoteCandidate() + + def HandleFailure(self, failing, inflight, no_stat): + """Handle a build failure. + + This function is called whenever the cbuildbot run fails. + For the master, this will be called when any slave fails or times + out. This function may be overridden by subclasses. + + Args: + failing: The names of the failing builders. + inflight: The names of the builders that are still running. + no_stat: Set of builder names of slave builders that had status None. + """ + if failing or inflight or no_stat: + logging.PrintBuildbotStepWarnings() + + if failing: + logging.warning('\n'.join([ + 'The following builders failed with this manifest:', + ', '.join(sorted(failing)), + 'Please check the logs of the failing builders for details.'])) + + if inflight: + logging.warning('\n'.join([ + 'The following builders took too long to finish:', + ', '.join(sorted(inflight)), + 'Please check the logs of these builders for details.'])) + + if no_stat: + logging.warning('\n'.join([ + 'The following builders did not start or failed prematurely:', + ', '.join(sorted(no_stat)), + 'Please check the logs of these builders for details.'])) + + def PerformStage(self): + super(MasterSlaveSyncCompletionStage, self).PerformStage() + + # Upload our pass/fail status to Google Storage. + self._run.attrs.manifest_manager.UploadStatus( + success=self.success, message=self.message, + dashboard_url=self.ConstructDashboardURL()) + + statuses = self._FetchSlaveStatuses() + self._slave_statuses = statuses + no_stat = set(builder for builder, status in statuses.iteritems() + if status.Missing()) + failing = set(builder for builder, status in statuses.iteritems() + if status.Failed()) + inflight = set(builder for builder, status in statuses.iteritems() + if status.Inflight()) + + # If all the failing or inflight builders were sanity checkers + # then ignore the failure. + fatal = self._IsFailureFatal(failing, inflight, no_stat) + + if fatal: + self._AnnotateFailingBuilders(failing, inflight, no_stat, statuses) + self.HandleFailure(failing, inflight, no_stat) + raise ImportantBuilderFailedException() + else: + self.HandleSuccess() + + def _IsFailureFatal(self, failing, inflight, no_stat): + """Returns a boolean indicating whether the build should fail. + + Args: + failing: Set of builder names of slave builders that failed. + inflight: Set of builder names of slave builders that are inflight + no_stat: Set of builder names of slave builders that had status None. + + Returns: + True if any of the failing or inflight builders are not sanity check + builders for this master, or if there were any non-sanity-check builders + with status None. + """ + sanity_builders = self._run.config.sanity_check_slaves or [] + sanity_builders = set(sanity_builders) + return not sanity_builders.issuperset(failing | inflight | no_stat) + + def _AnnotateFailingBuilders(self, failing, inflight, no_stat, statuses): + """Add annotations that link to either failing or inflight builders. + + Adds buildbot links to failing builder dashboards. If no builders are + failing, adds links to inflight builders. Adds step text for builders + with status None. + + Args: + failing: Set of builder names of slave builders that failed. + inflight: Set of builder names of slave builders that are inflight. + no_stat: Set of builder names of slave builders that had status None. + statuses: A builder-name->status dictionary, which will provide + the dashboard_url values for any links. + """ + builders_to_link = set.union(failing, inflight) + for builder in builders_to_link: + if statuses[builder].dashboard_url: + if statuses[builder].message: + text = '%s: %s' % (builder, statuses[builder].message.reason) + else: + text = '%s: timed out' % builder + + logging.PrintBuildbotLink(text, statuses[builder].dashboard_url) + + for builder in no_stat: + logging.PrintBuildbotStepText('%s did not start.' % builder) + + def GetSlaveStatuses(self): + """Returns cached slave status results. + + Cached results are populated during PerformStage, so this function + should only be called after PerformStage has returned. + + Returns: + A dictionary from build names to manifest_version.BuilderStatus + builder status objects. + """ + return self._slave_statuses + + def _GetFailedMessages(self, failing): + """Gathers the BuildFailureMessages from the |failing| builders. + + Args: + failing: Names of the builders that failed. + + Returns: + A list of BuildFailureMessage or NoneType objects. + """ + return [self._slave_statuses[x].message for x in failing] + + def _GetBuildersWithNoneMessages(self, failing): + """Returns a list of failed builders with NoneType failure message. + + Args: + failing: Names of the builders that failed. + + Returns: + A list of builder names. + """ + return [x for x in failing if self._slave_statuses[x].message is None] + + +class CanaryCompletionStage(MasterSlaveSyncCompletionStage): + """Collect build slave statuses and handle the failures.""" + + def HandleFailure(self, failing, inflight, no_stat): + """Handle a build failure or timeout in the Canary builders. + + Args: + failing: Names of the builders that failed. + inflight: Names of the builders that timed out. + no_stat: Set of builder names of slave builders that had status None. + """ + # Print out the status about what builds failed or not. + MasterSlaveSyncCompletionStage.HandleFailure( + self, failing, inflight, no_stat) + + if self._run.config.master: + self.CanaryMasterHandleFailure(failing, inflight, no_stat) + + def SendCanaryFailureAlert(self, failing, inflight, no_stat): + """Send an alert email to summarize canary failures. + + Args: + failing: The names of the failing builders. + inflight: The names of the builders that are still running. + no_stat: The names of the builders that had status None. + """ + builder_name = 'Canary Master' + title = '%s has detected build failures:' % builder_name + msgs = [str(x) for x in self._GetFailedMessages(failing)] + slaves = self._GetBuildersWithNoneMessages(failing) + msgs += ['%s failed with unknown reason.' % x for x in slaves] + msgs += ['%s timed out' % x for x in inflight] + msgs += ['%s did not start' % x for x in no_stat] + msgs.insert(0, title) + msgs.append('You can also view the summary of the slave failures from ' + 'the %s stage of %s. Click on the failure message to go ' + 'to an individual slave\'s build status page: %s' % ( + self.name, builder_name, self.ConstructDashboardURL())) + msg = '\n\n'.join(msgs) + logging.warning(msg) + extra_fields = {'X-cbuildbot-alert': 'canary-fail-alert'} + tree_status.SendHealthAlert(self._run, 'Canary builder failures', msg, + extra_fields=extra_fields) + + def _ComposeTreeStatusMessage(self, failing, inflight, no_stat): + """Composes a tres status message. + + Args: + failing: Names of the builders that failed. + inflight: Names of the builders that timed out. + no_stat: Set of builder names of slave builders that had status None. + + Returns: + A string. + """ + slave_status_list = [ + ('did not start', list(no_stat)), + ('timed out', list(inflight)), + ('failed', list(failing)),] + # Print maximum 2 slaves for each category to not clutter the + # message. + max_num = 2 + messages = [] + for status, slaves in slave_status_list: + if not slaves: + continue + slaves_str = ','.join(slaves[:max_num]) + if len(slaves) <= max_num: + messages.append('%s %s' % (slaves_str, status)) + else: + messages.append('%s and %d others %s' % (slaves_str, + len(slaves) - max_num, + status)) + return '; '.join(messages) + + def CanaryMasterHandleFailure(self, failing, inflight, no_stat): + """Handles the failure by sending out an alert email. + + Args: + failing: Names of the builders that failed. + inflight: Names of the builders that timed out. + no_stat: Set of builder names of slave builders that had status None. + """ + if self._run.manifest_branch == 'master': + self.SendCanaryFailureAlert(failing, inflight, no_stat) + tree_status.ThrottleOrCloseTheTree( + '"Canary master"', + self._ComposeTreeStatusMessage(failing, inflight, no_stat), + internal=self._run.config.internal, + buildnumber=self._run.buildnumber, + dryrun=self._run.debug) + + def _HandleStageException(self, exc_info): + """Decide whether an exception should be treated as fatal.""" + # Canary master already updates the tree status for slave + # failures. There is no need to mark this stage red. For slave + # builders, the build itself would already be red. In this case, + # report a warning instead. + # pylint: disable=protected-access + exc_type = exc_info[0] + if issubclass(exc_type, ImportantBuilderFailedException): + return self._HandleExceptionAsWarning(exc_info) + else: + # In all other cases, exceptions should be treated as fatal. + return super(CanaryCompletionStage, self)._HandleStageException(exc_info) + + +class CommitQueueCompletionStage(MasterSlaveSyncCompletionStage): + """Commits or reports errors to CL's that failed to be validated.""" + + # These stages are required to have run at least once and to never have + # failed, on each important slave. Otherwise, we may have incomplete + # information on which CLs affect which builders, and thus skip all + # board-aware submission. + _CRITICAL_STAGES = ('CommitQueueSync',) + + def HandleSuccess(self): + if self._run.config.master: + self.sync_stage.pool.SubmitPool(reason=constants.STRATEGY_CQ_SUCCESS) + # After submitting the pool, update the commit hashes for uprevved + # ebuilds. + manifest = git.ManifestCheckout.Cached(self._build_root) + portage_util.EBuild.UpdateCommitHashesForChanges( + self.sync_stage.pool.changes, self._build_root, manifest) + if config_lib.IsPFQType(self._run.config.build_type): + super(CommitQueueCompletionStage, self).HandleSuccess() + + manager = self._run.attrs.manifest_manager + version = manager.current_version + if version: + chroot_manager = chroot_lib.ChrootManager(self._build_root) + chroot_manager.SetChrootVersion(version) + + def HandleFailure(self, failing, inflight, no_stat): + """Handle a build failure or timeout in the Commit Queue. + + This function performs any tasks that need to happen when the Commit Queue + fails: + - Abort the HWTests if necessary. + - Push any CLs that indicate that they don't care about this failure. + - Determine what CLs to reject. + + See MasterSlaveSyncCompletionStage.HandleFailure. + + Args: + failing: Names of the builders that failed. + inflight: Names of the builders that timed out. + no_stat: Set of builder names of slave builders that had status None. + """ + # Print out the status about what builds failed or not. + MasterSlaveSyncCompletionStage.HandleFailure( + self, failing, inflight, no_stat) + + if self._run.config.master: + self.CQMasterHandleFailure(failing, inflight, no_stat) + + def _GetSlaveMappingAndCLActions(self, changes): + """Query CIDB to for slaves and CL actions. + + Args: + changes: A list of GerritPatch instances to examine. + + Returns: + A tuple of (config_map, action_history), where the config_map + is a dictionary mapping build_id to config name for all slaves + in this run plus the master, and action_history is a list of all + CL actions associated with |changes|. + """ + # build_id is the master build id for the run. + build_id, db = self._run.GetCIDBHandle() + assert db, 'No database connection to use.' + slave_list = db.GetSlaveStatuses(build_id) + # TODO(akeshet): We are getting the full action history for all changes that + # were in this CQ run. It would make more sense to only get the actions from + # build_ids of this master and its slaves. + action_history = db.GetActionsForChanges(changes) + + config_map = dict() + + # Build the build_id to config_name mapping. Note that if add the + # "relaunch" feature in cbuildbot, there may be multiple build ids + # for the same slave config. We will have to make sure + # GetSlaveStatuses() returns only the valid slaves (e.g. with + # latest start time). + for d in slave_list: + config_map[d['id']] = d['build_config'] + + # TODO(akeshet): We are giving special treatment to the CQ master, which + # makes this logic CQ specific. We only use this logic in the CQ anyway at + # the moment, but may need to reconsider if we need to generalize to other + # master-slave builds. + assert self._run.config.name == constants.CQ_MASTER + config_map[build_id] = constants.CQ_MASTER + + return config_map, action_history + + def GetRelevantChangesForSlaves(self, changes, no_stat): + """Compile a set of relevant changes for each slave. + + Args: + changes: A list of GerritPatch instances to examine. + no_stat: Set of builder names of slave builders that had status None. + + Returns: + A dictionary mapping a slave config name to a set of relevant changes. + """ + # Retrieve the slaves and clactions from CIDB. + config_map, action_history = self._GetSlaveMappingAndCLActions(changes) + changes_by_build_id = clactions.GetRelevantChangesForBuilds( + changes, action_history, config_map.keys()) + + # Convert index from build_ids to config names. + changes_by_config = dict() + for k, v in changes_by_build_id.iteritems(): + changes_by_config[config_map[k]] = v + + for config in no_stat: + # If a slave is in |no_stat|, it means that the slave never + # finished applying the changes in the sync stage. Hence the CL + # pickup actions for this slave may be + # inaccurate. Conservatively assume all changes are relevant. + changes_by_config[config] = set(changes) + + return changes_by_config + + def _ShouldSubmitPartialPool(self): + """Determine whether we should attempt or skip SubmitPartialPool. + + Returns: + True if all important, non-sanity-check slaves ran and completed all + critical stages, and hence it is safe to attempt SubmitPartialPool. False + otherwise. + """ + # sanity_check_slaves should not block board-aware submission, since they do + # not actually apply test patches. + sanity_check_slaves = set(self._run.config.sanity_check_slaves) + all_slaves = set([x.name for x in self._GetSlaveConfigs()]) + all_slaves -= sanity_check_slaves + assert self._run.config.name not in all_slaves + + # Get slave stages. + build_id, db = self._run.GetCIDBHandle() + assert db, 'No database connection to use.' + slave_stages = db.GetSlaveStages(build_id) + + should_submit = True + ACCEPTED_STATUSES = (constants.BUILDER_STATUS_PASSED, + constants.BUILDER_STATUS_SKIPPED,) + + # Configs that have passed critical stages. + configs_per_stage = {stage: set() for stage in self._CRITICAL_STAGES} + + for stage in slave_stages: + if (stage['name'] in self._CRITICAL_STAGES and + stage['status'] in ACCEPTED_STATUSES): + configs_per_stage[stage['name']].add(stage['build_config']) + + for stage in self._CRITICAL_STAGES: + missing_configs = all_slaves - configs_per_stage[stage] + if missing_configs: + logging.warning('Config(s) %s did not complete critical stage %s.', + ' '.join(missing_configs), stage) + should_submit = False + + return should_submit + + def CQMasterHandleFailure(self, failing, inflight, no_stat): + """Handle changes in the validation pool upon build failure or timeout. + + This function determines whether to reject CLs and what CLs to + reject based on the category of the failures and whether the + sanity check builder(s) passed. + + Args: + failing: Names of the builders that failed. + inflight: Names of the builders that timed out. + no_stat: Set of builder names of slave builders that had status None. + """ + messages = self._GetFailedMessages(failing) + self.SendInfraAlertIfNeeded(failing, inflight, no_stat) + + changes = self.sync_stage.pool.changes + + do_partial_submission = self._ShouldSubmitPartialPool() + + if do_partial_submission: + changes_by_config = self.GetRelevantChangesForSlaves(changes, no_stat) + + # Even if there was a failure, we can submit the changes that indicate + # that they don't care about this failure. + changes = self.sync_stage.pool.SubmitPartialPool( + changes, messages, changes_by_config, failing, inflight, no_stat, + reason=constants.STRATEGY_CQ_PARTIAL) + else: + logging.warning('Not doing any partial submission, due to critical stage ' + 'failure(s).') + title = 'CQ encountered a critical failure.' + msg = ('CQ encountered a critical failure, and hence skipped ' + 'board-aware submission. See %s' % self.ConstructDashboardURL()) + tree_status.SendHealthAlert(self._run, title, msg) + + sanity_check_slaves = set(self._run.config.sanity_check_slaves) + tot_sanity = self._ToTSanity(sanity_check_slaves, self._slave_statuses) + + if not tot_sanity: + # Sanity check slave failure may have been caused by bug(s) + # in ToT or broken infrastructure. In any of those cases, we + # should not reject any changes. + logging.warning('Detected that a sanity-check builder failed. ' + 'Will not reject any changes.') + + # If the tree was not open when we acquired a pool, do not assume that + # tot was sane. + if not self.sync_stage.pool.tree_was_open: + logging.info('The tree was not open when changes were acquired so we are ' + 'attributing failures to the broken tree rather than the ' + 'changes.') + tot_sanity = False + + if inflight: + # Some slave(s) timed out due to unknown causes, so only reject infra + # changes (probably just chromite changes). + self.sync_stage.pool.HandleValidationTimeout(sanity=tot_sanity, + changes=changes) + return + + # Some builder failed, or some builder did not report stats, or + # the intersection of both. Let HandleValidationFailure decide + # what changes to reject. + self.sync_stage.pool.HandleValidationFailure( + messages, sanity=tot_sanity, changes=changes, no_stat=no_stat) + + def _GetInfraFailMessages(self, failing): + """Returns a list of messages containing infra failures. + + Args: + failing: The names of the failing builders. + + Returns: + A list of BuildFailureMessage objects. + """ + msgs = self._GetFailedMessages(failing) + # Filter out None messages because we cannot analyze them. + return [x for x in msgs if x and + x.HasFailureType(failures_lib.InfrastructureFailure)] + + def SendInfraAlertIfNeeded(self, failing, inflight, no_stat): + """Send infra alerts if needed. + + Args: + failing: The names of the failing builders. + inflight: The names of the builders that are still running. + no_stat: The names of the builders that had status None. + """ + msgs = [str(x) for x in self._GetInfraFailMessages(failing)] + # Failed to report a non-None messages is an infra failure. + slaves = self._GetBuildersWithNoneMessages(failing) + msgs += ['%s failed with unknown reason.' % x for x in slaves] + msgs += ['%s timed out' % x for x in inflight] + msgs += ['%s did not start' % x for x in no_stat] + if msgs: + builder_name = self._run.config.name + title = '%s has encountered infra failures:' % (builder_name,) + msgs.insert(0, title) + msgs.append('See %s' % self.ConstructDashboardURL()) + msg = '\n\n'.join(msgs) + subject = '%s infra failures' % (builder_name,) + extra_fields = {'X-cbuildbot-alert': 'cq-infra-alert'} + tree_status.SendHealthAlert(self._run, subject, msg, + extra_fields=extra_fields) + + @staticmethod + def _ToTSanity(sanity_check_slaves, slave_statuses): + """Returns False if any sanity check slaves failed. + + Args: + sanity_check_slaves: Names of slave builders that are "sanity check" + builders for the current master. + slave_statuses: Dict of BuilderStatus objects by builder name keys. + + Returns: + True if no sanity builders ran and failed. + """ + sanity_check_slaves = sanity_check_slaves or [] + return not any([x in slave_statuses and slave_statuses[x].Failed() for + x in sanity_check_slaves]) + + def GetIrrelevantChanges(self, board_metadata): + """Calculates irrelevant changes. + + Args: + board_metadata: A dictionary of board specific metadata. + + Returns: + A set of irrelevant changes to the build. + """ + if not board_metadata: + return set() + # changes irrelevant to all the boards are irrelevant to the build + changeset_per_board_list = list() + for v in board_metadata.values(): + changes_dict_list = v.get('irrelevant_changes', None) + if changes_dict_list: + changes_set = set(cros_patch.GerritFetchOnlyPatch.FromAttrDict(d) for d + in changes_dict_list) + changeset_per_board_list.append(changes_set) + else: + # If any board has no irrelevant change, the whole build not have also. + return set() + + return set.intersection(*changeset_per_board_list) + + def PerformStage(self): + """Run CommitQueueCompletionStage.""" + if (not self._run.config.master and + not self._run.config.do_not_apply_cq_patches): + # Slave needs to record what change are irrelevant to this build. + board_metadata = self._run.attrs.metadata.GetDict().get('board-metadata') + irrelevant_changes = self.GetIrrelevantChanges(board_metadata) + self.sync_stage.pool.RecordIrrelevantChanges(irrelevant_changes) + + super(CommitQueueCompletionStage, self).PerformStage() + + +class PreCQCompletionStage(generic_stages.BuilderStage): + """Reports the status of a trybot run to Google Storage and Gerrit.""" + + def __init__(self, builder_run, sync_stage, success, **kwargs): + super(PreCQCompletionStage, self).__init__(builder_run, **kwargs) + self.sync_stage = sync_stage + self.success = success + + def GetBuildFailureMessage(self): + """Returns message summarizing the failures.""" + return CreateBuildFailureMessage(self._run.config.overlays, + self._run.config.name, + self._run.ConstructDashboardURL()) + + def PerformStage(self): + # Update Gerrit and Google Storage with the Pre-CQ status. + if self.success: + self.sync_stage.pool.HandlePreCQPerConfigSuccess() + else: + message = self.GetBuildFailureMessage() + self.sync_stage.pool.HandleValidationFailure([message]) + + +class PublishUprevChangesStage(generic_stages.BuilderStage): + """Makes uprev changes from pfq live for developers.""" + + def __init__(self, builder_run, success, **kwargs): + """Constructor. + + Args: + builder_run: BuilderRun object. + success: Boolean indicating whether the build succeeded. + """ + super(PublishUprevChangesStage, self).__init__(builder_run, **kwargs) + self.success = success + + def PerformStage(self): + overlays, push_overlays = self._ExtractOverlays() + assert push_overlays, 'push_overlays must be set to run this stage' + + # If the build failed, we don't want to push our local changes, because + # they might include some CLs that failed. Instead, clean up our local + # changes and do a fresh uprev. + if not self.success: + # Clean up our root and sync down the latest changes that were + # submitted. + commands.BuildRootGitCleanup(self._build_root) + + # Sync down the latest changes we have submitted. + if self._run.options.sync: + next_manifest = self._run.config.manifest + repo = self.GetRepoRepository() + repo.Sync(next_manifest) + + # Commit an uprev locally. + if self._run.options.uprev and self._run.config.uprev: + commands.UprevPackages(self._build_root, self._boards, overlays) + + # Push the uprev commit. + commands.UprevPush(self._build_root, push_overlays, self._run.options.debug) |