1 files changed, 852 insertions, 0 deletions
diff --git a/cbuildbot/stages/completion_stages.py b/cbuildbot/stages/completion_stages.py
new file mode 100644
index 000000000..2c6b08d34
--- /dev/null
+++ b/cbuildbot/stages/completion_stages.py
@@ -0,0 +1,852 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Module containing the completion stages."""
+
+from __future__ import print_function
+
+from chromite.cbuildbot import chroot_lib
+from chromite.cbuildbot import commands
+from chromite.cbuildbot import config_lib
+from chromite.cbuildbot import failures_lib
+from chromite.cbuildbot import results_lib
+from chromite.cbuildbot import constants
+from chromite.cbuildbot import manifest_version
+from chromite.cbuildbot import tree_status
+from chromite.cbuildbot.stages import generic_stages
+from chromite.cbuildbot.stages import sync_stages
+from chromite.lib import clactions
+from chromite.lib import cros_logging as logging
+from chromite.lib import git
+from chromite.lib import patch as cros_patch
+from chromite.lib import portage_util
+
+
+def GetBuilderSuccessMap(builder_run, overall_success):
+  """Get the pass/fail status of all builders.
+
+  A builder is marked as passed if all of its steps ran all of the way to
+  completion. We determine this by looking at whether all of the steps for
+  all of the constituent boards ran to completion.
+
+  In cases where a builder does not have any boards, or has child boards, we
+  fall back and instead just look at whether the entire build was successful.
+
+  Args:
+    builder_run: The builder run we wish to get the status of.
+    overall_success: The overall status of the build.
+
+  Returns:
+    A dict, mapping the builder names to whether they succeeded.
+  """
+  success_map = {}
+  for run in [builder_run] + builder_run.GetChildren():
+    if run.config.boards and not run.config.child_configs:
+      success_map[run.config.name] = True
+      for board in run.config.boards:
+        board_runattrs = run.GetBoardRunAttrs(board)
+        if not board_runattrs.HasParallel('success'):
+          success_map[run.config.name] = False
+    else:
+      # If a builder does not have boards, or if it has child configs, we
+      # will just use the overall status instead.
+      success_map[run.config.name] = overall_success
+  return success_map
+
+
+def CreateBuildFailureMessage(overlays, builder_name, dashboard_url):
+  """Creates a message summarizing the failures.
+
+  Args:
+    overlays: The overlays used for the build.
+    builder_name: The name of the builder.
+    dashboard_url: The URL of the build.
+
+  Returns:
+    A failures_lib.BuildFailureMessage object.
+  """
+  internal = overlays in [constants.PRIVATE_OVERLAYS,
+                          constants.BOTH_OVERLAYS]
+  details = []
+  tracebacks = tuple(results_lib.Results.GetTracebacks())
+  for x in tracebacks:
+    if isinstance(x.exception, failures_lib.CompoundFailure):
+      # We do not want the textual tracebacks included in the
+      # stringified CompoundFailure instance because this will be
+      # printed on the waterfall.
+      ex_str = x.exception.ToSummaryString()
+    else:
+      ex_str = str(x.exception)
+    # Truncate displayed failure reason to 1000 characters.
+    ex_str = ex_str[:200]
+    details.append('The %s stage failed: %s' % (x.failed_stage, ex_str))
+  if not details:
+    details = ['cbuildbot failed']
+
+  # reason does not include builder name or URL. This is mainly for
+  # populating the "failure message" column in the stats sheet.
+  reason = ' '.join(details)
+  details.append('in %s' % dashboard_url)
+  msg = '%s: %s' % (builder_name, ' '.join(details))
+
+  return failures_lib.BuildFailureMessage(msg, tracebacks, internal, reason,
+                                          builder_name)
+
+
+class ManifestVersionedSyncCompletionStage(
+    generic_stages.ForgivingBuilderStage):
+  """Stage that records board specific results for a unique manifest file."""
+
+  option_name = 'sync'
+
+  def __init__(self, builder_run, sync_stage, success, **kwargs):
+    super(ManifestVersionedSyncCompletionStage, self).__init__(
+        builder_run, **kwargs)
+    self.sync_stage = sync_stage
+    self.success = success
+    # Message that can be set that well be sent along with the status in
+    # UpdateStatus.
+    self.message = None
+
+  def GetBuildFailureMessage(self):
+    """Returns message summarizing the failures."""
+    return CreateBuildFailureMessage(self._run.config.overlays,
+                                     self._run.config.name,
+                                     self._run.ConstructDashboardURL())
+
+  def PerformStage(self):
+    if not self.success:
+      self.message = self.GetBuildFailureMessage()
+
+    if not config_lib.IsPFQType(self._run.config.build_type):
+      # Update the pass/fail status in the manifest-versions
+      # repo. Suite scheduler checks the build status to schedule
+      # suites.
+      self._run.attrs.manifest_manager.UpdateStatus(
+          success_map=GetBuilderSuccessMap(self._run, self.success),
+          message=self.message, dashboard_url=self.ConstructDashboardURL())
+
+
+class ImportantBuilderFailedException(failures_lib.StepFailure):
+  """Exception thrown when an important build fails to build."""
+
+
+class MasterSlaveSyncCompletionStage(ManifestVersionedSyncCompletionStage):
+  """Stage that records whether we passed or failed to build/test manifest."""
+
+  def __init__(self, *args, **kwargs):
+    super(MasterSlaveSyncCompletionStage, self).__init__(*args, **kwargs)
+    self._slave_statuses = {}
+
+  def _GetLocalBuildStatus(self):
+    """Return the status for this build as a dictionary."""
+    status = manifest_version.BuilderStatus.GetCompletedStatus(self.success)
+    status_obj = manifest_version.BuilderStatus(status, self.message)
+    return {self._bot_id: status_obj}
+
+  def _FetchSlaveStatuses(self):
+    """Fetch and return build status for slaves of this build.
+
+    If this build is not a master then return just the status of this build.
+
+    Returns:
+      A dict of build_config name -> BuilderStatus objects, for all important
+      slave build configs. Build configs that never started will have a
+      BuilderStatus of MISSING.
+    """
+    # Wait for slaves if we're a master, in production or mock-production.
+    # Otherwise just look at our own status.
+    slave_statuses = self._GetLocalBuildStatus()
+    if not self._run.config.master:
+      # The slave build returns its own status.
+      logging.warning('The build is not a master.')
+    elif self._run.options.mock_slave_status or not self._run.options.debug:
+      # The master build.
+      builders = self._GetSlaveConfigs()
+      builder_names = [b.name for b in builders]
+      timeout = None
+      build_id, db = self._run.GetCIDBHandle()
+      if db:
+        timeout = db.GetTimeToDeadline(build_id)
+      if timeout is None:
+        # Catch-all: This could happen if cidb is not setup, or the deadline
+        # query fails.
+        timeout = constants.MASTER_BUILD_TIMEOUT_DEFAULT_SECONDS
+
+      if self._run.options.debug:
+        # For debug runs, wait for three minutes to ensure most code
+        # paths are executed.
+        logging.info('Waiting for 3 minutes only for debug run. '
+                     'Would have waited for %s seconds.', timeout)
+        timeout = 3 * 60
+
+      manager = self._run.attrs.manifest_manager
+      if sync_stages.MasterSlaveLKGMSyncStage.sub_manager:
+        manager = sync_stages.MasterSlaveLKGMSyncStage.sub_manager
+      slave_statuses.update(manager.GetBuildersStatus(
+          self._run.attrs.metadata.GetValue('build_id'),
+          builder_names,
+          timeout=timeout))
+    return slave_statuses
+
+  def _HandleStageException(self, exc_info):
+    """Decide whether an exception should be treated as fatal."""
+    # Besides the master, the completion stages also run on slaves, to report
+    # their status back to the master. If the build failed, they throw an
+    # exception here. For slave builders, marking this stage 'red' would be
+    # redundant, since the build itself would already be red. In this case,
+    # report a warning instead.
+    # pylint: disable=protected-access
+    exc_type = exc_info[0]
+    if (issubclass(exc_type, ImportantBuilderFailedException) and
+        not self._run.config.master):
+      return self._HandleExceptionAsWarning(exc_info)
+    else:
+      # In all other cases, exceptions should be treated as fatal. To
+      # implement this, we bypass ForgivingStage and call
+      # generic_stages.BuilderStage._HandleStageException explicitly.
+      return generic_stages.BuilderStage._HandleStageException(self, exc_info)
+
+  def HandleSuccess(self):
+    """Handle a successful build.
+
+    This function is called whenever the cbuildbot run is successful.
+    For the master, this will only be called when all slave builders
+    are also successful. This function may be overridden by subclasses.
+    """
+    # We only promote for the pfq, not chrome pfq.
+    # TODO(build): Run this logic in debug mode too.
+    if (not self._run.options.debug and
+        config_lib.IsPFQType(self._run.config.build_type) and
+        self._run.config.master and
+        self._run.manifest_branch == 'master' and
+        self._run.config.build_type != constants.CHROME_PFQ_TYPE):
+      self._run.attrs.manifest_manager.PromoteCandidate()
+      if sync_stages.MasterSlaveLKGMSyncStage.sub_manager:
+        sync_stages.MasterSlaveLKGMSyncStage.sub_manager.PromoteCandidate()
+
+  def HandleFailure(self, failing, inflight, no_stat):
+    """Handle a build failure.
+
+    This function is called whenever the cbuildbot run fails.
+    For the master, this will be called when any slave fails or times
+    out. This function may be overridden by subclasses.
+
+    Args:
+      failing: The names of the failing builders.
+      inflight: The names of the builders that are still running.
+      no_stat: Set of builder names of slave builders that had status None.
+    """
+    if failing or inflight or no_stat:
+      logging.PrintBuildbotStepWarnings()
+
+    if failing:
+      logging.warning('\n'.join([
+          'The following builders failed with this manifest:',
+          ', '.join(sorted(failing)),
+          'Please check the logs of the failing builders for details.']))
+
+    if inflight:
+      logging.warning('\n'.join([
+          'The following builders took too long to finish:',
+          ', '.join(sorted(inflight)),
+          'Please check the logs of these builders for details.']))
+
+    if no_stat:
+      logging.warning('\n'.join([
+          'The following builders did not start or failed prematurely:',
+          ', '.join(sorted(no_stat)),
+          'Please check the logs of these builders for details.']))
+
+  def PerformStage(self):
+    super(MasterSlaveSyncCompletionStage, self).PerformStage()
+
+    # Upload our pass/fail status to Google Storage.
+    self._run.attrs.manifest_manager.UploadStatus(
+        success=self.success, message=self.message,
+        dashboard_url=self.ConstructDashboardURL())
+
+    statuses = self._FetchSlaveStatuses()
+    self._slave_statuses = statuses
+    no_stat = set(builder for builder, status in statuses.iteritems()
+                  if status.Missing())
+    failing = set(builder for builder, status in statuses.iteritems()
+                  if status.Failed())
+    inflight = set(builder for builder, status in statuses.iteritems()
+                   if status.Inflight())
+
+    # If all the failing or inflight builders were sanity checkers
+    # then ignore the failure.
+    fatal = self._IsFailureFatal(failing, inflight, no_stat)
+
+    if fatal:
+      self._AnnotateFailingBuilders(failing, inflight, no_stat, statuses)
+      self.HandleFailure(failing, inflight, no_stat)
+      raise ImportantBuilderFailedException()
+    else:
+      self.HandleSuccess()
+
+  def _IsFailureFatal(self, failing, inflight, no_stat):
+    """Returns a boolean indicating whether the build should fail.
+
+    Args:
+      failing: Set of builder names of slave builders that failed.
+      inflight: Set of builder names of slave builders that are inflight
+      no_stat: Set of builder names of slave builders that had status None.
+
+    Returns:
+      True if any of the failing or inflight builders are not sanity check
+      builders for this master, or if there were any non-sanity-check builders
+      with status None.
+    """
+    sanity_builders = self._run.config.sanity_check_slaves or []
+    sanity_builders = set(sanity_builders)
+    return not sanity_builders.issuperset(failing | inflight | no_stat)
+
+  def _AnnotateFailingBuilders(self, failing, inflight, no_stat, statuses):
+    """Add annotations that link to either failing or inflight builders.
+
+    Adds buildbot links to failing builder dashboards. If no builders are
+    failing, adds links to inflight builders. Adds step text for builders
+    with status None.
+
+    Args:
+      failing: Set of builder names of slave builders that failed.
+      inflight: Set of builder names of slave builders that are inflight.
+      no_stat: Set of builder names of slave builders that had status None.
+      statuses: A builder-name->status dictionary, which will provide
+                the dashboard_url values for any links.
+    """
+    builders_to_link = set.union(failing, inflight)
+    for builder in builders_to_link:
+      if statuses[builder].dashboard_url:
+        if statuses[builder].message:
+          text = '%s: %s' % (builder, statuses[builder].message.reason)
+        else:
+          text = '%s: timed out' % builder
+
+        logging.PrintBuildbotLink(text, statuses[builder].dashboard_url)
+
+    for builder in no_stat:
+      logging.PrintBuildbotStepText('%s did not start.' % builder)
+
+  def GetSlaveStatuses(self):
+    """Returns cached slave status results.
+
+    Cached results are populated during PerformStage, so this function
+    should only be called after PerformStage has returned.
+
+    Returns:
+      A dictionary from build names to manifest_version.BuilderStatus
+      builder status objects.
+    """
+    return self._slave_statuses
+
+  def _GetFailedMessages(self, failing):
+    """Gathers the BuildFailureMessages from the |failing| builders.
+
+    Args:
+      failing: Names of the builders that failed.
+
+    Returns:
+      A list of BuildFailureMessage or NoneType objects.
+    """
+    return [self._slave_statuses[x].message for x in failing]
+
+  def _GetBuildersWithNoneMessages(self, failing):
+    """Returns a list of failed builders with NoneType failure message.
+
+    Args:
+      failing: Names of the builders that failed.
+
+    Returns:
+      A list of builder names.
+    """
+    return [x for x in failing if self._slave_statuses[x].message is None]
+
+
+class CanaryCompletionStage(MasterSlaveSyncCompletionStage):
+  """Collect build slave statuses and handle the failures."""
+
+  def HandleFailure(self, failing, inflight, no_stat):
+    """Handle a build failure or timeout in the Canary builders.
+
+    Args:
+      failing: Names of the builders that failed.
+      inflight: Names of the builders that timed out.
+      no_stat: Set of builder names of slave builders that had status None.
+    """
+    # Print out the status about what builds failed or not.
+    MasterSlaveSyncCompletionStage.HandleFailure(
+        self, failing, inflight, no_stat)
+
+    if self._run.config.master:
+      self.CanaryMasterHandleFailure(failing, inflight, no_stat)
+
+  def SendCanaryFailureAlert(self, failing, inflight, no_stat):
+    """Send an alert email to summarize canary failures.
+
+    Args:
+      failing: The names of the failing builders.
+      inflight: The names of the builders that are still running.
+      no_stat: The names of the builders that had status None.
+    """
+    builder_name = 'Canary Master'
+    title = '%s has detected build failures:' % builder_name
+    msgs = [str(x) for x in self._GetFailedMessages(failing)]
+    slaves = self._GetBuildersWithNoneMessages(failing)
+    msgs += ['%s failed with unknown reason.' % x for x in slaves]
+    msgs += ['%s timed out' % x for x in inflight]
+    msgs += ['%s did not start' % x for x in no_stat]
+    msgs.insert(0, title)
+    msgs.append('You can also view the summary of the slave failures from '
+                'the %s stage of %s. Click on the failure message to go '
+                'to an individual slave\'s build status page: %s' % (
+                    self.name, builder_name, self.ConstructDashboardURL()))
+    msg = '\n\n'.join(msgs)
+    logging.warning(msg)
+    extra_fields = {'X-cbuildbot-alert': 'canary-fail-alert'}
+    tree_status.SendHealthAlert(self._run, 'Canary builder failures', msg,
+                                extra_fields=extra_fields)
+
+  def _ComposeTreeStatusMessage(self, failing, inflight, no_stat):
+    """Composes a tres status message.
+
+    Args:
+      failing: Names of the builders that failed.
+      inflight: Names of the builders that timed out.
+      no_stat: Set of builder names of slave builders that had status None.
+
+    Returns:
+      A string.
+    """
+    slave_status_list = [
+        ('did not start', list(no_stat)),
+        ('timed out', list(inflight)),
+        ('failed', list(failing)),]
+    # Print maximum 2 slaves for each category to not clutter the
+    # message.
+    max_num = 2
+    messages = []
+    for status, slaves in slave_status_list:
+      if not slaves:
+        continue
+      slaves_str = ','.join(slaves[:max_num])
+      if len(slaves) <= max_num:
+        messages.append('%s %s' % (slaves_str, status))
+      else:
+        messages.append('%s and %d others %s' % (slaves_str,
+                                                 len(slaves) - max_num,
+                                                 status))
+    return '; '.join(messages)
+
+  def CanaryMasterHandleFailure(self, failing, inflight, no_stat):
+    """Handles the failure by sending out an alert email.
+
+    Args:
+      failing: Names of the builders that failed.
+      inflight: Names of the builders that timed out.
+      no_stat: Set of builder names of slave builders that had status None.
+    """
+    if self._run.manifest_branch == 'master':
+      self.SendCanaryFailureAlert(failing, inflight, no_stat)
+      tree_status.ThrottleOrCloseTheTree(
+          '"Canary master"',
+          self._ComposeTreeStatusMessage(failing, inflight, no_stat),
+          internal=self._run.config.internal,
+          buildnumber=self._run.buildnumber,
+          dryrun=self._run.debug)
+
+  def _HandleStageException(self, exc_info):
+    """Decide whether an exception should be treated as fatal."""
+    # Canary master already updates the tree status for slave
+    # failures. There is no need to mark this stage red. For slave
+    # builders, the build itself would already be red. In this case,
+    # report a warning instead.
+    # pylint: disable=protected-access
+    exc_type = exc_info[0]
+    if issubclass(exc_type, ImportantBuilderFailedException):
+      return self._HandleExceptionAsWarning(exc_info)
+    else:
+      # In all other cases, exceptions should be treated as fatal.
+      return super(CanaryCompletionStage, self)._HandleStageException(exc_info)
+
+
+class CommitQueueCompletionStage(MasterSlaveSyncCompletionStage):
+  """Commits or reports errors to CL's that failed to be validated."""
+
+  # These stages are required to have run at least once and to never have
+  # failed, on each important slave. Otherwise, we may have incomplete
+  # information on which CLs affect which builders, and thus skip all
+  # board-aware submission.
+  _CRITICAL_STAGES = ('CommitQueueSync',)
+
+  def HandleSuccess(self):
+    if self._run.config.master:
+      self.sync_stage.pool.SubmitPool(reason=constants.STRATEGY_CQ_SUCCESS)
+      # After submitting the pool, update the commit hashes for uprevved
+      # ebuilds.
+      manifest = git.ManifestCheckout.Cached(self._build_root)
+      portage_util.EBuild.UpdateCommitHashesForChanges(
+          self.sync_stage.pool.changes, self._build_root, manifest)
+      if config_lib.IsPFQType(self._run.config.build_type):
+        super(CommitQueueCompletionStage, self).HandleSuccess()
+
+    manager = self._run.attrs.manifest_manager
+    version = manager.current_version
+    if version:
+      chroot_manager = chroot_lib.ChrootManager(self._build_root)
+      chroot_manager.SetChrootVersion(version)
+
+  def HandleFailure(self, failing, inflight, no_stat):
+    """Handle a build failure or timeout in the Commit Queue.
+
+    This function performs any tasks that need to happen when the Commit Queue
+    fails:
+      - Abort the HWTests if necessary.
+      - Push any CLs that indicate that they don't care about this failure.
+      - Determine what CLs to reject.
+
+    See MasterSlaveSyncCompletionStage.HandleFailure.
+
+    Args:
+      failing: Names of the builders that failed.
+      inflight: Names of the builders that timed out.
+      no_stat: Set of builder names of slave builders that had status None.
+    """
+    # Print out the status about what builds failed or not.
+    MasterSlaveSyncCompletionStage.HandleFailure(
+        self, failing, inflight, no_stat)
+
+    if self._run.config.master:
+      self.CQMasterHandleFailure(failing, inflight, no_stat)
+
+  def _GetSlaveMappingAndCLActions(self, changes):
+    """Query CIDB to for slaves and CL actions.
+
+    Args:
+      changes: A list of GerritPatch instances to examine.
+
+    Returns:
+      A tuple of (config_map, action_history), where the config_map
+      is a dictionary mapping build_id to config name for all slaves
+      in this run plus the master, and action_history is a list of all
+      CL actions associated with |changes|.
+    """
+    # build_id is the master build id for the run.
+    build_id, db = self._run.GetCIDBHandle()
+    assert db, 'No database connection to use.'
+    slave_list = db.GetSlaveStatuses(build_id)
+    # TODO(akeshet): We are getting the full action history for all changes that
+    # were in this CQ run. It would make more sense to only get the actions from
+    # build_ids of this master and its slaves.
+    action_history = db.GetActionsForChanges(changes)
+
+    config_map = dict()
+
+    # Build the build_id to config_name mapping. Note that if add the
+    # "relaunch" feature in cbuildbot, there may be multiple build ids
+    # for the same slave config. We will have to make sure
+    # GetSlaveStatuses() returns only the valid slaves (e.g. with
+    # latest start time).
+    for d in slave_list:
+      config_map[d['id']] = d['build_config']
+
+    # TODO(akeshet): We are giving special treatment to the CQ master, which
+    # makes this logic CQ specific. We only use this logic in the CQ anyway at
+    # the moment, but may need to reconsider if we need to generalize to other
+    # master-slave builds.
+    assert self._run.config.name == constants.CQ_MASTER
+    config_map[build_id] = constants.CQ_MASTER
+
+    return config_map, action_history
+
+  def GetRelevantChangesForSlaves(self, changes, no_stat):
+    """Compile a set of relevant changes for each slave.
+
+    Args:
+      changes: A list of GerritPatch instances to examine.
+      no_stat: Set of builder names of slave builders that had status None.
+
+    Returns:
+      A dictionary mapping a slave config name to a set of relevant changes.
+    """
+    # Retrieve the slaves and clactions from CIDB.
+    config_map, action_history = self._GetSlaveMappingAndCLActions(changes)
+    changes_by_build_id = clactions.GetRelevantChangesForBuilds(
+        changes, action_history, config_map.keys())
+
+    # Convert index from build_ids to config names.
+    changes_by_config = dict()
+    for k, v in changes_by_build_id.iteritems():
+      changes_by_config[config_map[k]] = v
+
+    for config in no_stat:
+      # If a slave is in |no_stat|, it means that the slave never
+      # finished applying the changes in the sync stage. Hence the CL
+      # pickup actions for this slave may be
+      # inaccurate. Conservatively assume all changes are relevant.
+      changes_by_config[config] = set(changes)
+
+    return changes_by_config
+
+  def _ShouldSubmitPartialPool(self):
+    """Determine whether we should attempt or skip SubmitPartialPool.
+
+    Returns:
+      True if all important, non-sanity-check slaves ran and completed all
+      critical stages, and hence it is safe to attempt SubmitPartialPool. False
+      otherwise.
+    """
+    # sanity_check_slaves should not block board-aware submission, since they do
+    # not actually apply test patches.
+    sanity_check_slaves = set(self._run.config.sanity_check_slaves)
+    all_slaves = set([x.name for x in self._GetSlaveConfigs()])
+    all_slaves -= sanity_check_slaves
+    assert self._run.config.name not in all_slaves
+
+    # Get slave stages.
+    build_id, db = self._run.GetCIDBHandle()
+    assert db, 'No database connection to use.'
+    slave_stages = db.GetSlaveStages(build_id)
+
+    should_submit = True
+    ACCEPTED_STATUSES = (constants.BUILDER_STATUS_PASSED,
+                         constants.BUILDER_STATUS_SKIPPED,)
+
+    # Configs that have passed critical stages.
+    configs_per_stage = {stage: set() for stage in self._CRITICAL_STAGES}
+
+    for stage in slave_stages:
+      if (stage['name'] in self._CRITICAL_STAGES and
+          stage['status'] in ACCEPTED_STATUSES):
+        configs_per_stage[stage['name']].add(stage['build_config'])
+
+    for stage in self._CRITICAL_STAGES:
+      missing_configs = all_slaves - configs_per_stage[stage]
+      if missing_configs:
+        logging.warning('Config(s) %s did not complete critical stage %s.',
+                        ' '.join(missing_configs), stage)
+        should_submit = False
+
+    return should_submit
+
+  def CQMasterHandleFailure(self, failing, inflight, no_stat):
+    """Handle changes in the validation pool upon build failure or timeout.
+
+    This function determines whether to reject CLs and what CLs to
+    reject based on the category of the failures and whether the
+    sanity check builder(s) passed.
+
+    Args:
+      failing: Names of the builders that failed.
+      inflight: Names of the builders that timed out.
+      no_stat: Set of builder names of slave builders that had status None.
+    """
+    messages = self._GetFailedMessages(failing)
+    self.SendInfraAlertIfNeeded(failing, inflight, no_stat)
+
+    changes = self.sync_stage.pool.changes
+
+    do_partial_submission = self._ShouldSubmitPartialPool()
+
+    if do_partial_submission:
+      changes_by_config = self.GetRelevantChangesForSlaves(changes, no_stat)
+
+      # Even if there was a failure, we can submit the changes that indicate
+      # that they don't care about this failure.
+      changes = self.sync_stage.pool.SubmitPartialPool(
+          changes, messages, changes_by_config, failing, inflight, no_stat,
+          reason=constants.STRATEGY_CQ_PARTIAL)
+    else:
+      logging.warning('Not doing any partial submission, due to critical stage '
+                      'failure(s).')
+      title = 'CQ encountered a critical failure.'
+      msg = ('CQ encountered a critical failure, and hence skipped '
+             'board-aware submission. See %s' % self.ConstructDashboardURL())
+      tree_status.SendHealthAlert(self._run, title, msg)
+
+    sanity_check_slaves = set(self._run.config.sanity_check_slaves)
+    tot_sanity = self._ToTSanity(sanity_check_slaves, self._slave_statuses)
+
+    if not tot_sanity:
+      # Sanity check slave failure may have been caused by bug(s)
+      # in ToT or broken infrastructure. In any of those cases, we
+      # should not reject any changes.
+      logging.warning('Detected that a sanity-check builder failed. '
+                      'Will not reject any changes.')
+
+    # If the tree was not open when we acquired a pool, do not assume that
+    # tot was sane.
+    if not self.sync_stage.pool.tree_was_open:
+      logging.info('The tree was not open when changes were acquired so we are '
+                   'attributing failures to the broken tree rather than the '
+                   'changes.')
+      tot_sanity = False
+
+    if inflight:
+      # Some slave(s) timed out due to unknown causes, so only reject infra
+      # changes (probably just chromite changes).
+      self.sync_stage.pool.HandleValidationTimeout(sanity=tot_sanity,
+                                                   changes=changes)
+      return
+
+    # Some builder failed, or some builder did not report stats, or
+    # the intersection of both. Let HandleValidationFailure decide
+    # what changes to reject.
+    self.sync_stage.pool.HandleValidationFailure(
+        messages, sanity=tot_sanity, changes=changes, no_stat=no_stat)
+
+  def _GetInfraFailMessages(self, failing):
+    """Returns a list of messages containing infra failures.
+
+    Args:
+      failing: The names of the failing builders.
+
+    Returns:
+      A list of BuildFailureMessage objects.
+    """
+    msgs = self._GetFailedMessages(failing)
+    # Filter out None messages because we cannot analyze them.
+    return [x for x in msgs if x and
+            x.HasFailureType(failures_lib.InfrastructureFailure)]
+
+  def SendInfraAlertIfNeeded(self, failing, inflight, no_stat):
+    """Send infra alerts if needed.
+
+    Args:
+      failing: The names of the failing builders.
+      inflight: The names of the builders that are still running.
+      no_stat: The names of the builders that had status None.
+    """
+    msgs = [str(x) for x in self._GetInfraFailMessages(failing)]
+    # Failed to report a non-None messages is an infra failure.
+    slaves = self._GetBuildersWithNoneMessages(failing)
+    msgs += ['%s failed with unknown reason.' % x for x in slaves]
+    msgs += ['%s timed out' % x for x in inflight]
+    msgs += ['%s did not start' % x for x in no_stat]
+    if msgs:
+      builder_name = self._run.config.name
+      title = '%s has encountered infra failures:' % (builder_name,)
+      msgs.insert(0, title)
+      msgs.append('See %s' % self.ConstructDashboardURL())
+      msg = '\n\n'.join(msgs)
+      subject = '%s infra failures' % (builder_name,)
+      extra_fields = {'X-cbuildbot-alert': 'cq-infra-alert'}
+      tree_status.SendHealthAlert(self._run, subject, msg,
+                                  extra_fields=extra_fields)
+
+  @staticmethod
+  def _ToTSanity(sanity_check_slaves, slave_statuses):
+    """Returns False if any sanity check slaves failed.
+
+    Args:
+      sanity_check_slaves: Names of slave builders that are "sanity check"
+        builders for the current master.
+      slave_statuses: Dict of BuilderStatus objects by builder name keys.
+
+    Returns:
+      True if no sanity builders ran and failed.
+    """
+    sanity_check_slaves = sanity_check_slaves or []
+    return not any([x in slave_statuses and slave_statuses[x].Failed() for
+                    x in sanity_check_slaves])
+
+  def GetIrrelevantChanges(self, board_metadata):
+    """Calculates irrelevant changes.
+
+    Args:
+      board_metadata: A dictionary of board specific metadata.
+
+    Returns:
+      A set of irrelevant changes to the build.
+    """
+    if not board_metadata:
+      return set()
+    # changes irrelevant to all the boards are irrelevant to the build
+    changeset_per_board_list = list()
+    for v in board_metadata.values():
+      changes_dict_list = v.get('irrelevant_changes', None)
+      if changes_dict_list:
+        changes_set = set(cros_patch.GerritFetchOnlyPatch.FromAttrDict(d) for d
+                          in changes_dict_list)
+        changeset_per_board_list.append(changes_set)
+      else:
+        # If any board has no irrelevant change, the whole build not have also.
+        return set()
+
+    return set.intersection(*changeset_per_board_list)
+
+  def PerformStage(self):
+    """Run CommitQueueCompletionStage."""
+    if (not self._run.config.master and
+        not self._run.config.do_not_apply_cq_patches):
+      # Slave needs to record what change are irrelevant to this build.
+      board_metadata = self._run.attrs.metadata.GetDict().get('board-metadata')
+      irrelevant_changes = self.GetIrrelevantChanges(board_metadata)
+      self.sync_stage.pool.RecordIrrelevantChanges(irrelevant_changes)
+
+    super(CommitQueueCompletionStage, self).PerformStage()
+
+
+class PreCQCompletionStage(generic_stages.BuilderStage):
+  """Reports the status of a trybot run to Google Storage and Gerrit."""
+
+  def __init__(self, builder_run, sync_stage, success, **kwargs):
+    super(PreCQCompletionStage, self).__init__(builder_run, **kwargs)
+    self.sync_stage = sync_stage
+    self.success = success
+
+  def GetBuildFailureMessage(self):
+    """Returns message summarizing the failures."""
+    return CreateBuildFailureMessage(self._run.config.overlays,
+                                     self._run.config.name,
+                                     self._run.ConstructDashboardURL())
+
+  def PerformStage(self):
+    # Update Gerrit and Google Storage with the Pre-CQ status.
+    if self.success:
+      self.sync_stage.pool.HandlePreCQPerConfigSuccess()
+    else:
+      message = self.GetBuildFailureMessage()
+      self.sync_stage.pool.HandleValidationFailure([message])
+
+
+class PublishUprevChangesStage(generic_stages.BuilderStage):
+  """Makes uprev changes from pfq live for developers."""
+
+  def __init__(self, builder_run, success, **kwargs):
+    """Constructor.
+
+    Args:
+      builder_run: BuilderRun object.
+      success: Boolean indicating whether the build succeeded.
+    """
+    super(PublishUprevChangesStage, self).__init__(builder_run, **kwargs)
+    self.success = success
+
+  def PerformStage(self):
+    overlays, push_overlays = self._ExtractOverlays()
+    assert push_overlays, 'push_overlays must be set to run this stage'
+
+    # If the build failed, we don't want to push our local changes, because
+    # they might include some CLs that failed. Instead, clean up our local
+    # changes and do a fresh uprev.
+    if not self.success:
+      # Clean up our root and sync down the latest changes that were
+      # submitted.
+      commands.BuildRootGitCleanup(self._build_root)
+
+      # Sync down the latest changes we have submitted.
+      if self._run.options.sync:
+        next_manifest = self._run.config.manifest
+        repo = self.GetRepoRepository()
+        repo.Sync(next_manifest)
+
+      # Commit an uprev locally.
+      if self._run.options.uprev and self._run.config.uprev:
+        commands.UprevPackages(self._build_root, self._boards, overlays)
+
+    # Push the uprev commit.
+    commands.UprevPush(self._build_root, push_overlays, self._run.options.debug)