# Copyright 2014 The Chromium OS Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Module that helps to triage Commit Queue failures.""" from __future__ import print_function import logging import os import pprint from chromite.cbuildbot import cbuildbot_config from chromite.cbuildbot import failures_lib from chromite.cbuildbot import constants from chromite.lib import git from chromite.lib import patch as cros_patch from chromite.lib import portage_util def GetRelevantOverlaysForConfig(config, build_root): """Returns a list of overlays relevant to |config|. Args: config: A cbuildbot config name. build_root: Path to the build root. Returns: A set of overlays. """ relevant_overlays = set() for board in config.boards: overlays = portage_util.FindOverlays( constants.BOTH_OVERLAYS, board, build_root) relevant_overlays.update(overlays) return relevant_overlays def GetAffectedOverlays(change, manifest, all_overlays): """Get the set of overlays affected by a given change. Args: change: The GerritPatch instance to look at. manifest: A ManifestCheckout instance representing our build directory. all_overlays: The set of all valid overlays. Returns: The set of overlays affected by the specified |change|. If the change affected something other than an overlay, return None. """ checkout = change.GetCheckout(manifest, strict=False) if checkout: git_repo = checkout.GetPath(absolute=True) # The whole git repo is an overlay. Return it. # Example: src/private-overlays/overlay-x86-zgb-private if git_repo in all_overlays: return set([git_repo]) # Get the set of immediate subdirs affected by the change. # Example: src/overlays/overlay-x86-zgb subdirs = set([os.path.join(git_repo, path.split(os.path.sep)[0]) for path in change.GetDiffStatus(git_repo)]) # If all of the subdirs are overlays, return them. if subdirs.issubset(all_overlays): return subdirs class CategorizeChanges(object): """A collection of methods to help categorize GerritPatch changes.""" @classmethod def ClassifyOverlayChanges(cls, changes, config, build_root, manifest): """Classifies overlay changes in |changes|. Args: changes: The list or set of GerritPatch instances. config: The cbuildbot config. build_root: Path to the build root. manifest: A ManifestCheckout instance representing our build directory. Returns: A (overlay_changes, irrelevant_overlay_changes) tuple; overlay_changes is a subset of |changes| that have modified one or more overlays, and irrelevant_overlay_changes is a subset of overlay_changes which are irrelevant to |config|. """ visible_overlays = set(portage_util.FindOverlays(config.overlays, None, build_root)) # The overlays relevant to this build. relevant_overlays = GetRelevantOverlaysForConfig(config, build_root) overlay_changes = set() irrelevant_overlay_changes = set() for change in changes: affected_overlays = GetAffectedOverlays(change, manifest, visible_overlays) if affected_overlays is not None: # The change modifies an overlay. overlay_changes.add(change) if not any(x in relevant_overlays for x in affected_overlays): # The change touched an irrelevant overlay. irrelevant_overlay_changes.add(change) return overlay_changes, irrelevant_overlay_changes @classmethod def ClassifyWorkOnChanges(cls, changes, config, build_root, manifest, packages_under_test): """Classifies WorkOn package changes in |changes|. Args: changes: The list or set of GerritPatch instances. config: The cbuildbot config. build_root: Path to the build root. manifest: A ManifestCheckout instance representing our build directory. packages_under_test: A list of packages names included in the build. (e.g. ['chromeos-base/chromite-0.0.1-r1258']). Returns: A (workon_changes, irrelevant_workon_changes) tuple; workon_changes is a subset of |changes| that have modified workon packages, and irrelevant_workon_changes is a subset of workon_changes which are irrelevant to |config|. """ workon_changes = set() irrelevant_workon_changes = set() # Strip the version of the package in packages_under_test cpv_list = [portage_util.SplitCPV(x) for x in packages_under_test] cp_under_test = ['%s/%s' % (x.category, x.package) for x in cpv_list] workon_dict = portage_util.BuildFullWorkonPackageDictionary( build_root, config.overlays, manifest) pp = pprint.PrettyPrinter(indent=2) logging.info('(project, branch) to workon package mapping:\n %s', pp.pformat(workon_dict)) logging.info('packages under test\n: %s', pp.pformat(cp_under_test)) for change in changes: packages = workon_dict.get((change.project, change.tracking_branch)) if packages: # The CL modifies a workon package. workon_changes.add(change) if all(x not in cp_under_test for x in packages): irrelevant_workon_changes.add(change) return workon_changes, irrelevant_workon_changes @classmethod def _FilterProjectsInManifestByGroup(cls, manifest, groups): """Filters projects in |manifest| by |groups|. Args: manifest: A git.Manifest instance. groups: A list of groups to filter. Returns: A set of (project, branch) tuples where each tuple is asssociated with at least one group in |groups|. """ results = set() for project, checkout_list in manifest.checkouts_by_name.iteritems(): for checkout in checkout_list: if any(x in checkout['groups'] for x in groups): branch = git.StripRefs(checkout['tracking_branch']) results.add((project, branch)) return results @classmethod def GetChangesToBuildTools(cls, changes, manifest): """Returns a changes associated with buildtools projects. Args: changes: The list or set of GerritPatch instances. manifest: A git.Manifest instance. Returns: A subset of |changes| to projects of "buildtools" group. """ buildtool_set = cls._FilterProjectsInManifestByGroup( manifest, ['buildtools']) return set([x for x in changes if (x.project, x.tracking_branch) in buildtool_set]) @classmethod def GetIrrelevantChanges(cls, changes, config, build_root, manifest, packages_under_test): """Determine changes irrelavant to build |config|. This method determine a set of changes that are irrelevant to the build |config|. The general rule of thumb is that if we are unsure whether a change is relevant, consider it relevant. Args: changes: The list or set of GerritPatch instances. config: The cbuildbot config. build_root: Path to the build root. manifest: A ManifestCheckout instance representing our build directory. packages_under_test: A list of packages that were tested in this build. Returns: A subset of |changes| which are irrelevant to |config|. """ untriaged_changes = set(changes) irrelevant_changes = set() # Changes that modify projects used in building are always relevant. untriaged_changes -= cls.GetChangesToBuildTools(changes, manifest) # Handles overlay changes. # ClassifyOverlayChanges only handles overlays visible to this # build. For example, an external build may not be able to view # the internal overlays. However, in that case, the internal changes # have already been filtered out in CommitQueueSyncStage, and are # not included in |changes|. overlay_changes, irrelevant_overlay_changes = cls.ClassifyOverlayChanges( untriaged_changes, config, build_root, manifest) untriaged_changes -= overlay_changes irrelevant_changes |= irrelevant_overlay_changes # Handles workon package changes. if packages_under_test is not None: workon_changes, irrelevant_workon_changes = cls.ClassifyWorkOnChanges( untriaged_changes, config, build_root, manifest, packages_under_test) untriaged_changes -= workon_changes irrelevant_changes |= irrelevant_workon_changes return irrelevant_changes class CalculateSuspects(object): """Diagnose the cause for a given set of failures.""" @classmethod def GetBlamedChanges(cls, changes): """Returns the changes that have been manually blamed. Args: changes: List of GerritPatch changes. Returns: A list of |changes| that were marked verified: -1 or code-review: -2. """ return [x for x in changes if any(x.HasApproval(f, v) for f, v in constants.DEFAULT_CQ_SHOULD_REJECT_FIELDS.iteritems())] @classmethod def _FindPackageBuildFailureSuspects(cls, changes, messages): """Figure out what CLs are at fault for a set of build failures. Args: changes: A list of cros_patch.GerritPatch instances to consider. messages: A list of build failure messages, of type BuildFailureMessage. """ suspects = set() for message in messages: suspects.update(message.FindPackageBuildFailureSuspects(changes)) return suspects @classmethod def FilterChromiteChanges(cls, changes): """Returns a list of chromite changes in |changes|.""" return [x for x in changes if x.project == constants.CHROMITE_PROJECT] @classmethod def _MatchesFailureType(cls, messages, fail_type, strict=True): """Returns True if all failures are instances of |fail_type|. Args: messages: A list of BuildFailureMessage or NoneType objects from the failed slaves. fail_type: The exception class to look for. strict: If False, treat NoneType message as a match. Returns: True if all objects in |messages| are non-None and all failures are instances of |fail_type|. """ return ((not strict or all(messages)) and all(x.MatchesFailureType(fail_type) for x in messages if x)) @classmethod def OnlyLabFailures(cls, messages, no_stat): """Determine if the cause of build failure was lab failure. Args: messages: A list of BuildFailureMessage or NoneType objects from the failed slaves. no_stat: A list of builders which failed prematurely without reporting status. Returns: True if the build failed purely due to lab failures. """ # If any builder failed prematuely, lab failure was not the only cause. return (not no_stat and cls._MatchesFailureType(messages, failures_lib.TestLabFailure)) @classmethod def OnlyInfraFailures(cls, messages, no_stat): """Determine if the cause of build failure was infrastructure failure. Args: messages: A list of BuildFailureMessage or NoneType objects from the failed slaves. no_stat: A list of builders which failed prematurely without reporting status. Returns: True if the build failed purely due to infrastructure failures. """ # "Failed to report status" and "NoneType" messages are considered # infra failures. return ((not messages and no_stat) or cls._MatchesFailureType( messages, failures_lib.InfrastructureFailure, strict=False)) @classmethod def FindSuspects(cls, changes, messages, infra_fail=False, lab_fail=False): """Find out what changes probably caused our failure. In cases where there were no internal failures, we can assume that the external failures are at fault. Otherwise, this function just defers to _FindPackageBuildFailureSuspects and FindPreviouslyFailedChanges as needed. If the failures don't match either case, just fail everything. Args: changes: A list of cros_patch.GerritPatch instances to consider. messages: A list of build failure messages, of type BuildFailureMessage or of type NoneType. infra_fail: The build failed purely due to infrastructure failures. lab_fail: The build failed purely due to test lab infrastructure failures. Returns: A set of changes as suspects. """ bad_changes = cls.GetBlamedChanges(changes) if bad_changes: # If there are changes that have been set verified=-1 or # code-review=-2, these changes are the ONLY suspects of the # failed build. logging.warning('Detected that some changes have been blamed for ' 'the build failure. Only these CLs will be rejected: %s', cros_patch.GetChangesAsString(bad_changes)) return set(bad_changes) elif lab_fail: logging.warning('Detected that the build failed purely due to HW ' 'Test Lab failure(s). Will not reject any changes') return set() elif not lab_fail and infra_fail: # The non-lab infrastructure errors might have been caused # by chromite changes. logging.warning( 'Detected that the build failed due to non-lab infrastructure ' 'issue(s). Will only reject chromite changes') return set(cls.FilterChromiteChanges(changes)) if all(message and message.IsPackageBuildFailure() for message in messages): # If we are here, there are no None messages. suspects = cls._FindPackageBuildFailureSuspects(changes, messages) else: suspects = set(changes) return suspects @classmethod def GetResponsibleOverlays(cls, build_root, messages): """Get the set of overlays that could have caused failures. This loops through the set of builders that failed in a given run and finds what overlays could have been responsible for the failure. Args: build_root: Build root directory. messages: A list of build failure messages from supporting builders. These must be BuildFailureMessage objects or NoneType objects. Returns: The set of overlays that could have caused the failures. If we can't determine what overlays are responsible, returns None. """ responsible_overlays = set() for message in messages: if message is None: return None bot_id = message.builder config = cbuildbot_config.config.get(bot_id) if not config: return None responsible_overlays.update( GetRelevantOverlaysForConfig(config, build_root)) return responsible_overlays @classmethod def FilterOutInnocentChanges(cls, build_root, changes, messages): """Filter out innocent changes based on failure messages. Args: build_root: Build root directory. changes: GitRepoPatches that might be guilty. messages: A list of build failure messages from supporting builders. These must be BuildFailureMessage objects or NoneType objects. Returns: A list of the changes that we could not prove innocent. """ # If there were no internal failures, only kick out external changes. # (Still, fail all changes if we received any None messages.) candidates = changes if all(messages) and not any(message.internal for message in messages): candidates = [change for change in changes if not change.internal] return cls.FilterOutInnocentOverlayChanges(build_root, candidates, messages) @classmethod def FilterOutInnocentOverlayChanges(cls, build_root, changes, messages): """Filter out innocent overlay changes based on failure messages. It is not possible to break a x86-generic builder via a change to an unrelated overlay (e.g. amd64-generic). Filter out changes that are known to be innocent. Args: build_root: Build root directory. changes: GitRepoPatches that might be guilty. messages: A list of build failure messages from supporting builders. These must be BuildFailureMessage objects or NoneType objects. Returns: A list of the changes that we could not prove innocent. """ all_overlays = set(portage_util.FindOverlays( constants.BOTH_OVERLAYS, None, build_root)) responsible_overlays = cls.GetResponsibleOverlays(build_root, messages) if responsible_overlays is None: return changes manifest = git.ManifestCheckout.Cached(build_root) candidates = [] for change in changes: overlays = GetAffectedOverlays(change, manifest, all_overlays) if overlays is None or overlays.issubset(responsible_overlays): candidates.append(change) return candidates