diff options
author | Don Garrett <dgarrett@google.com> | 2014-08-12 14:56:01 -0700 |
---|---|---|
committer | chrome-internal-fetch <chrome-internal-fetch@google.com> | 2014-08-16 05:22:15 +0000 |
commit | c2fd63c7a3c315fe4a4eae17222085ac5f84ca04 (patch) | |
tree | 0809168b035828234ccdfde212ce14fc985457c4 /licensing | |
parent | 07ac932c5dbaaa0fc1de3db186669a5bf805ccd6 (diff) | |
download | chromite-c2fd63c7a3c315fe4a4eae17222085ac5f84ca04.tar.gz |
licensing: Split licenses.py -> licenses_lib.py and licenses.py.
Turn license into a stand alone script that uses a helper library to do
it's work. Split up it's 'main' method into distinct stages.
Also, fix nit's in hook script from a previous CL.
BUG=None
TEST=cros lint + manual testing.
CQ-DEPEND=CL:212124
Change-Id: Ic81d50260a6b7603904aabc38b3877ad59abb2b7
Reviewed-on: https://chromium-review.googlesource.com/212137
Tested-by: Don Garrett <dgarrett@chromium.org>
Reviewed-by: Don Garrett <dgarrett@chromium.org>
Commit-Queue: Don Garrett <dgarrett@chromium.org>
Diffstat (limited to 'licensing')
-rw-r--r-- | licensing/ebuild_license_hook.py | 15 | ||||
-rw-r--r-- | licensing/licenses.py | 1333 | ||||
-rw-r--r-- | licensing/licenses_lib.py | 1241 |
3 files changed, 1296 insertions, 1293 deletions
diff --git a/licensing/ebuild_license_hook.py b/licensing/ebuild_license_hook.py index 41e33d602..07fb7c12b 100644 --- a/licensing/ebuild_license_hook.py +++ b/licensing/ebuild_license_hook.py @@ -1,9 +1,8 @@ #!/usr/bin/python -# -# Copyright (c) 2014 The Chromium OS Authors. All rights reserved. +# Copyright 2014 The Chromium OS Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. -# + """Emerge hook to pre-parse and verify license information. Called from src/scripts/hooks/install/gen-package-licenses.sh as part of a @@ -12,16 +11,16 @@ package emerge. from chromite.lib import commandline -from chromite.licensing import licenses +from chromite.licensing import licenses_lib def main(args): parser = commandline.ArgumentParser(usage=__doc__) - parser.add_argument("--builddir", type="path", dest="builddir", - help="Take $PORTAGE_BUILDDIR as argument.") + parser.add_argument('--builddir', type='path', dest='builddir', + help='Take $PORTAGE_BUILDDIR as argument.') opts = parser.parse_args(args) + opts.Freeze() - - licensing = licenses.Licensing(None, None, True) + licensing = licenses_lib.Licensing(None, None, True) licensing.HookPackageProcess(opts.builddir) diff --git a/licensing/licenses.py b/licensing/licenses.py index a9e548cec..cf279780a 100644 --- a/licensing/licenses.py +++ b/licensing/licenses.py @@ -120,1282 +120,55 @@ the file in /opt/google/chrome/resources/about_os_credits.html or as defined in http://crbug.com/271832 . """ -import cgi -import codecs + import logging import os -import re -import tempfile -from chromite.cbuildbot import constants -from chromite.cbuildbot import portage_utilities from chromite.lib import commandline from chromite.lib import cros_build_lib -from chromite.lib import osutils - -# We are imported by src/repohooks/pre-upload.py in a non chroot environment -# where yaml may not be there, so we don't error on that since it's not needed -# in that case. -try: - import yaml -except ImportError: - yaml = None - -debug = False - -# See http://crbug.com/207004 for discussion. -PER_PKG_LICENSE_DIR = '/var/db/pkg' - -STOCK_LICENSE_DIRS = [ - os.path.join(constants.SOURCE_ROOT, - 'src/third_party/portage-stable/licenses'), -] - -# There are licenses for custom software we got and isn't part of -# upstream gentoo. -CUSTOM_LICENSE_DIRS = [ - os.path.join(constants.SOURCE_ROOT, - 'src/third_party/chromiumos-overlay/licenses'), -] - -COPYRIGHT_ATTRIBUTION_DIR = ( - os.path.join( - constants.SOURCE_ROOT, - 'src/third_party/chromiumos-overlay/licenses/copyright-attribution')) - -# Virtual packages don't need to have a license and often don't, so we skip them -# chromeos-base contains google platform packages that are covered by the -# general license at top of tree, so we skip those too. -SKIPPED_CATEGORIES = [ - 'virtual', -] - -SKIPPED_PACKAGES = [ - # Fix these packages by adding a real license in the code. - # You should not skip packages just because the license scraping doesn't - # work. Stick those special cases into PACKAGE_LICENSES. - # Packages should only be here because they are sub/split packages already - # covered by the license of the main package. - - # These are Chrome-OS-specific packages, copyright BSD-Google - 'sys-kernel/chromeos-kernel', # already manually credit Linux -] - -SKIPPED_LICENSES = [ - # Some of our packages contain binary blobs for which we have special - # negotiated licenses, and no need to display anything publicly. Strongly - # consider using Google-TOS instead, if possible. - 'Proprietary-Binary', - - # If you have an early repo for which license terms have yet to be decided - # use this. It will cause licensing for the package to be mostly ignored. - # Official should error for any package with this license. - 'TAINTED', # TODO(dgarrett): Error on official builds with this license. -] - -LICENSE_NAMES_REGEX = [ - r'^copyright$', - r'^copyright[.]txt$', - r'^copyright[.]regex$', # llvm - r'^copying.*$', - r'^licen[cs]e.*$', - r'^licensing.*$', # libatomic_ops - r'^ipa_font_license_agreement_v1[.]0[.]txt$', # ja-ipafonts - r'^PKG-INFO$', # copyright assignment for - # some python packages - # (netifaces, unittest2) -] - -# These are _temporary_ license mappings for packages that do not have a valid -# shared/custom license, or LICENSE file we can use. -# Once this script runs earlier (during the package build process), it will -# block new source without a LICENSE file if the ebuild contains a license -# that requires copyright assignment (BSD and friends). -# At that point, new packages will get fixed to include LICENSE instead of -# adding workaround mappings like those below. -# The way you now fix copyright attribution cases create a custom file with the -# right license directly in COPYRIGHT_ATTRIBUTION_DIR. -PACKAGE_LICENSES = { - # TODO: replace the naive license parsing code in this script with a hook - # into portage's license parsing. See http://crbug.com/348779 - - # Chrome (the browser) is complicated, it has a morphing license that is - # either BSD-Google, or BSD-Google,Google-TOS depending on how it was - # built. We bypass this problem for now by hardcoding the Google-TOS bit as - # per ChromeOS with non free bits - 'chromeos-base/chromeos-chrome': ['BSD-Google', 'Google-TOS'], - - # Currently the code cannot parse LGPL-3 || ( LGPL-2.1 MPL-1.1 ) - 'dev-python/pycairo': ['LGPL-3', 'LGPL-2.1'], -} - -# Any license listed list here found in the ebuild will make the code look for -# license files inside the package source code in order to get copyright -# attribution from them. -COPYRIGHT_ATTRIBUTION_LICENSES = [ - 'BSD', # requires distribution of copyright notice - 'BSD-2', # so does BSD-2 http://opensource.org/licenses/BSD-2-Clause - 'BSD-3', # and BSD-3? http://opensource.org/licenses/BSD-3-Clause - 'BSD-4', # and 4? - 'BSD-with-attribution', - 'MIT', - 'MIT-with-advertising', - 'Old-MIT', -] - -# The following licenses are not invalid or to show as a less helpful stock -# license, but it's better to look in the source code for a more specific -# license if there is one, but not an error if no better one is found. -# Note that you don't want to set just anything here since any license here -# will be included once in stock form and a second time in custom form if -# found (there is no good way to know that a license we found on disk is the -# better version of the stock version, so we show both). -LOOK_IN_SOURCE_LICENSES = [ - 'as-is', # The stock license is very vague, source always has more details. - 'PSF-2', # The custom license in python is more complete than the template. - - # As far as I know, we have no requirement to do copyright attribution for - # these licenses, but the license included in the code has slightly better - # information than the stock Gentoo one (including copyright attribution). - 'BZIP2', # Single use license, do copyright attribution. - 'OFL', # Almost single use license, do copyright attribution. - 'OFL-1.1', # Almost single use license, do copyright attribution. - 'UoI-NCSA', # Only used by NSCA, might as well show their custom copyright. -] - -# This used to provide overrides. I can't find a valid reason to add any more -# here, though. -PACKAGE_HOMEPAGES = { - # Example: - # 'x11-proto/glproto': ['http://www.x.org/'], -} - -# These are tokens found in LICENSE= in an ebuild that aren't licenses we -# can actually read from disk. -# You should not use this to blacklist real licenses. -LICENCES_IGNORE = [ - ')', # Ignore OR tokens from LICENSE="|| ( LGPL-2.1 MPL-1.1 )" - '(', - '||', -] - -# Find the directory of this script. -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) - -# The template files we depend on for generating HTML. -TMPL = os.path.join(SCRIPT_DIR, 'about_credits.tmpl') -ENTRY_TMPL = os.path.join(SCRIPT_DIR, 'about_credits_entry.tmpl') -SHARED_LICENSE_TMPL = os.path.join( - SCRIPT_DIR, 'about_credits_shared_license_entry.tmpl') - - -# This is called directly by src/repohooks/pre-upload.py -def GetLicenseTypesFromEbuild(ebuild_path): - """Returns a list of license types from the ebuild file. - - This function does not always return the correct list, but it is - faster than using portageq for not having to access chroot. It is - intended to be used for tasks such as presubmission checks. - - Args: - ebuild_path: ebuild to read. - - Returns: - list of licenses read from ebuild. - - Raises: - ValueError: ebuild errors. - """ - ebuild_env_tmpl = """ -has() { [[ " ${*:2} " == *" $1 "* ]]; } -inherit() { - local overlay_list="%(overlay_list)s" - local eclass overlay f - for eclass; do - has ${eclass} ${_INHERITED_} && continue - _INHERITED_+=" ${eclass}" - for overlay in %(overlay_list)s; do - f="${overlay}/eclass/${eclass}.eclass" - if [[ -e ${f} ]]; then - source "${f}" - break - fi - done - done -} -source %(ebuild)s""" - - # TODO: the overlay_list hard-coded here should be changed to look - # at the current overlay, and then the master overlays. E.g. for an - # ebuild file in overlay-parrot, we will look at parrot overlay - # first, and then look at portage-stable and chromiumos, which are - # listed as masters in overlay-parrot/metadata/layout.conf. - tmpl_env = { - 'ebuild': ebuild_path, - 'overlay_list': '%s %s' % ( - os.path.join(constants.SOURCE_ROOT, - 'src/third_party/chromiumos-overlay'), - os.path.join(constants.SOURCE_ROOT, - 'src/third_party/portage-stable')) - } - - with tempfile.NamedTemporaryFile(bufsize=0) as f: - osutils.WriteFile(f.name, ebuild_env_tmpl % tmpl_env) - env = osutils.SourceEnvironment( - f.name, whitelist=['LICENSE'], ifs=' ', multiline=True) - - if not env.get('LICENSE'): - raise ValueError('No LICENSE found in the ebuild.') - if re.search(r'[,;]', env['LICENSE']): - raise ValueError( - 'LICENSE field in the ebuild should be whitespace-limited.') - - return env['LICENSE'].split() - - -class PackageLicenseError(Exception): - """Thrown if something fails while getting license information for a package. - - This will cause the processing to error in the end. - """ - - -class PackageInfo(object): - """Package info containers, mostly for storing licenses.""" - - def __init__(self): - - self.board = None - self.revision = None - - # Array of scanned license texts. - self.license_text_scanned = [] - - self.category = None - self.name = None - self.version = None - - # Looks something like this - # /mnt/host/source/src/ - # third_party/portage-stable/net-misc/rsync/rsync-3.0.8.ebuild - self.ebuild_path = None - - # Array of license names retrieved from ebuild or override in this code. - self.ebuild_license_names = [] - self.homepages = [] - # This contains licenses names we can read from Gentoo or custom licenses. - # These are supposed to be shared licenses (i.e. licenses referenced by - # more then one package), but after all processing, we may find out that - # some are only used once and they get taken out of the shared pool and - # pasted directly in the sole package that was using them (see - # GenerateHTMLLicenseOutput). - self.license_names = set() - - # We set this if the ebuild has a BSD/MIT like license that requires - # scanning for a LICENSE file in the source code, or a static mapping - # in PACKAGE_LICENSES. Not finding one once this is set, is fatal. - self.need_copyright_attribution = False - # This flag just says we'd like to include licenses from the source, but - # not finding any is not fatal. - self.scan_source_for_licenses = False - - # After reading basic package information, we can mark the package as - # one to skip in licensing. - self.skip = False - - # If we failed to get licensing for this package, mark it as such so that - # it can be flagged when the full license file is being generated. - self.licensing_failed = False - - # If we are called from a hook, we grab package info from the soure tree. - # This is also used as a flag to know whether we should do package work - # based on an installed package, or one that is being built and we got - # called from the hook. - self.build_source_tree = None - - @property - def fullnamerev(self): - s = '%s-%s' % (self.fullname, self.version) - if self.revision: - s += '-r%s' % self.revision - return s - - @property - def fullname(self): - return '%s/%s' % (self.category, self.name) - - @property - def license_dump_path(self): - """e.g. /build/x86-alex//var/db/pkg/sys-apps/dtc-1.4.0/license.yaml.""" - return "%s/%s/%s/license.yaml" % (cros_build_lib.GetSysroot(self.board), - PER_PKG_LICENSE_DIR, self.fullnamerev) - - def _BuildInfo(self, filename): - filename = '%s/build-info/%s' % (self.build_source_tree, filename) - # Buildinfo properties we read are in US-ASCII, not Unicode. - try: - bi = open(filename).read().rstrip() - # Some properties like HOMEPAGE may be absent. - except IOError: - bi = "" - return bi - - def _RunEbuildPhases(self, phases): - """Run a list of ebuild phases on an ebuild. - - Args: - phases: list of phases like ['clean', 'fetch'] or ['unpack']. - - Returns: - ebuild command output - """ - - return cros_build_lib.RunCommand( - ['ebuild-%s' % self.board, self.ebuild_path] + phases, print_cmd=debug, - redirect_stdout=True) - - def _GetOverrideLicense(self): - """Look in COPYRIGHT_ATTRIBUTION_DIR for license with copyright attribution. - - For dev-util/bsdiff-4.3-r5, the code will look for - dev-util/bsdiff-4.3-r5 - dev-util/bsdiff-4.3 - dev-util/bsdiff - - It is ok to have more than one bsdiff license file, and an empty file acts - as a rubout (i.e. an empty dev-util/bsdiff-4.4 will shadow dev-util/bsdiff - and tell the licensing code to look in the package source for a license - instead of using dev-util/bsdiff as an override). - - Returns: - False (no license found) or a multiline license string. - """ - license_read = None - # dev-util/bsdiff-4.3-r5 -> bsdiff-4.3-r5 - filename = os.path.basename(self.fullnamerev) - license_path = os.path.join(COPYRIGHT_ATTRIBUTION_DIR, - os.path.dirname(self.fullnamerev)) - pv = portage_utilities.SplitPV(filename) - pv_no_rev = '%s-%s' % (pv.package, pv.version_no_rev) - for filename in (pv.pv, pv_no_rev, pv.package): - file_path = os.path.join(license_path, filename) - logging.debug("Looking for override copyright attribution license in %s", - file_path) - if os.path.exists(file_path): - # Turn - # /../merlin/trunk/src/third_party/chromiumos-overlay/../dev-util/bsdiff - # into - # chromiumos-overlay/../dev-util/bsdiff - short_dir_path = os.path.join(*file_path.rsplit(os.path.sep, 5)[1:]) - license_read = "Copyright Attribution License %s:\n\n" % short_dir_path - license_read += ReadUnknownEncodedFile( - file_path, "read copyright attribution license") - break - - return license_read - - def _ExtractLicenses(self): - """Scrounge for text licenses in the source of package we'll unpack. - - This is only called if we couldn't get usable licenses from the ebuild, - or one of them is BSD/MIT like which forces us to look for a file with - copyright attribution in the source code itself. - - First, we have a shortcut where we scan COPYRIGHT_ATTRIBUTION_DIR to see if - we find a license for this package. If so, we use that. - Typically it'll be used if the unpacked source does not have the license - that we're required to display for copyright attribution (in some cases it's - plain absent, in other cases, it could be in a filename we don't look for). - - Otherwise, we scan the unpacked source code for what looks like license - files as defined in LICENSE_NAMES_REGEX. - - Raises: - AssertionError: on runtime errors - PackageLicenseError: couldn't find copyright attribution file. - """ - license_override = self._GetOverrideLicense() - if license_override: - self.license_text_scanned = [license_override] - return - - if self.build_source_tree: - workdir = "%s/work" % self.build_source_tree - else: - self._RunEbuildPhases(['clean', 'fetch']) - output = self._RunEbuildPhases(['unpack']).output.splitlines() - # Output is spammy, it looks like this: - # * gc-7.2d.tar.gz RMD160 SHA1 SHA256 size ;-) ... [ ok ] - # * checking gc-7.2d.tar.gz ;-) ... [ ok ] - # * Running stacked hooks for pre_pkg_setup - # * sysroot_build_bin_dir ... - # [ ok ] - # * Running stacked hooks for pre_src_unpack - # * python_multilib_setup ... - # [ ok ] - # >>> Unpacking source... - # >>> Unpacking gc-7.2d.tar.gz to /build/x86-alex/tmp/po/[...]ps-7.2d/work - # >>> Source unpacked in /build/x86-alex/tmp/portage/[...]ops-7.2d/work - # So we only keep the last 2 lines, the others we don't care about. - output = [line for line in output if line[0:3] == ">>>" and - line != ">>> Unpacking source..."] - for line in output: - logging.info(line) - - args = ['portageq-%s' % self.board, 'envvar', 'PORTAGE_TMPDIR'] - result = cros_build_lib.RunCommand(args, print_cmd=debug, - redirect_stdout=True) - tmpdir = result.output.splitlines()[0] - # tmpdir gets something like /build/daisy/tmp/ - workdir = os.path.join(tmpdir, 'portage', self.fullnamerev, 'work') - - if not os.path.exists(workdir): - raise AssertionError("Unpack of %s didn't create %s. Version mismatch" % - (self.fullnamerev, workdir)) - - # You may wonder how deep should we go? - # In case of packages with sub-packages, it could be deep. - # Let's just be safe and get everything we can find. - # In the case of libatomic_ops, it's actually required to look deep - # to find the MIT license: - # dev-libs/libatomic_ops-7.2d/work/gc-7.2/libatomic_ops/doc/LICENSING.txt - args = ['find', workdir, '-type', 'f'] - result = cros_build_lib.RunCommand(args, print_cmd=debug, - redirect_stdout=True).output.splitlines() - # Truncate results to look like this: swig-2.0.4/COPYRIGHT - files = [x[len(workdir):].lstrip('/') for x in result] - license_files = [] - for name in files: - # When we scan a source tree managed by git, this can contain license - # files that are not part of the source. Exclude those. - # (e.g. .git/refs/heads/licensing) - if ".git/" in name: - continue - basename = os.path.basename(name) - # Looking for license.* brings up things like license.gpl, and we - # never want a GPL license when looking for copyright attribution, - # so we skip them here. We also skip regexes that can return - # license.py (seen in some code). - if re.search(r".*GPL.*", basename) or re.search(r"\.py$", basename): - continue - for regex in LICENSE_NAMES_REGEX: - if re.search(regex, basename, re.IGNORECASE): - license_files.append(name) - break - - if not license_files: - if self.need_copyright_attribution: - logging.error(""" -%s: unable to find usable license. -Typically this will happen because the ebuild says it's MIT or BSD, but there -was no license file that this script could find to include along with a -copyright attribution (required for BSD/MIT). - -If this is Google source, please change -LICENSE="BSD" -to -LICENSE="BSD-Google" - -If not, go investigate the unpacked source in %s, -and find which license to assign. Once you found it, you should copy that -license to a file under %s -(or you can modify LICENSE_NAMES_REGEX to pickup a license file that isn't -being scraped currently).""", - self.fullnamerev, workdir, COPYRIGHT_ATTRIBUTION_DIR) - raise PackageLicenseError() - else: - # We can get called for a license like as-is where it's preferable - # to find a better one in the source, but not fatal if we didn't. - logging.info("Was not able to find a better license for %s " - "in %s to replace the more generic one from ebuild", - self.fullnamerev, workdir) - - # Examples of multiple license matches: - # dev-lang/swig-2.0.4-r1: swig-2.0.4/COPYRIGHT swig-2.0.4/LICENSE - # dev-libs/glib-2.32.4-r1: glib-2.32.4/COPYING pkg-config-0.26/COPYING - # dev-libs/libnl-3.2.14: libnl-doc-3.2.14/COPYING libnl-3.2.14/COPYING - # dev-libs/libpcre-8.30-r2: pcre-8.30/LICENCE pcre-8.30/COPYING - # dev-libs/libusb-0.1.12-r6: libusb-0.1.12/COPYING libusb-0.1.12/LICENSE - # dev-libs/pyzy-0.1.0-r1: db/COPYING pyzy-0.1.0/COPYING - # net-misc/strongswan-5.0.2-r4: strongswan-5.0.2/COPYING - # strongswan-5.0.2/LICENSE - # sys-process/procps-3.2.8_p11: debian/copyright procps-3.2.8/COPYING - logging.info('License(s) for %s: %s', self.fullnamerev, - ' '.join(license_files)) - for license_file in sorted(license_files): - # Joy and pink ponies. Some license_files are encoded as latin1 while - # others are utf-8 and of course you can't know but only guess. - license_path = os.path.join(workdir, license_file) - license_txt = ReadUnknownEncodedFile(license_path, "Adding License") - - self.license_text_scanned += [ - "Scanned Source License %s:\n\n%s" % (license_file, license_txt)] - - # We used to clean up here, but there have been many instances where - # looking at unpacked source to see where the licenses were, was useful - # so let's disable this for now - # self._RunEbuildPhases(['clean']) - - def GetPackageInfo(self, fullnamewithrev): - """Populate PackageInfo with package license, and homepage. - - self.ebuild_license_names will not be filled if the package is skipped - or if there was an issue getting data from the ebuild. - self.license_names will only get the licenses that we can paste - as shared licenses. - scan_source_for_licenses will be set if we should unpack the source to look - for licenses - if need_copyright_attribution is also set, not finding a license in the - source is fatal (PackageLicenseError will get raised). - - Args: - fullnamewithrev: e.g. dev-libs/libatomic_ops-7.2d - - Raises: - AssertionError: on runtime errors - """ - if not fullnamewithrev: - if not self.build_source_tree: - raise AssertionError("Cannot continue without full name or source tree") - fullnamewithrev = "%s/%s" % (self._BuildInfo("CATEGORY"), - self._BuildInfo("PF")) - logging.debug("Computed package name %s from %s", fullnamewithrev, - self.build_source_tree) - - try: - cpv = portage_utilities.SplitCPV(fullnamewithrev) - # A bad package can either raise a TypeError exception or return None, - # so we catch both cases. - if not cpv: - raise TypeError - except TypeError: - raise AssertionError("portage couldn't find %s, missing version number?" % - fullnamewithrev) - - self.category, self.name, self.version, self.revision = ( - cpv.category, cpv.package, cpv.version_no_rev, cpv.rev) - - if self.revision is not None: - self.revision = str(self.revision).lstrip('r') - if self.revision == '0': - self.revision = None - - if self.category in SKIPPED_CATEGORIES: - logging.info("%s in SKIPPED_CATEGORIES, skip package", self.fullname) - self.skip = True - return - - if self.fullname in SKIPPED_PACKAGES: - logging.info("%s in SKIPPED_PACKAGES, skip package", self.fullname) - self.skip = True - return - - def _FindEbuildPath(self): - """Populate package info from an ebuild retrieved via equery.""" - # By default, equery returns the latest version of the package. A - # build may have used an older version than what is currently - # available in the source tree (a build dependency can be pinned - # to an older version of a package for compatibility - # reasons). Therefore we need to tell equery that we want the - # exact version number used in the image build as opposed to the - # latest available in the source tree. - args = ['equery-%s' % self.board, '-q', '-C', 'which', self.fullnamerev] - try: - path = cros_build_lib.RunCommand(args, print_cmd=True, - redirect_stdout=True).output.strip() - if not path: - raise AssertionError - except: - raise AssertionError('GetEbuildPath for %s failed.\n' - 'Is your tree clean? Delete %s and rebuild' % - (self.name, - cros_build_lib.GetSysroot(board=self.board))) - logging.debug("%s -> %s", " ".join(args), path) - - if not os.access(path, os.F_OK): - raise AssertionError("Can't access %s", path) - - self.ebuild_path = path - - def _ReadEbuildMetadata(self): - """Read package metadata retrieved via portageq.""" - args = ['portageq-%s' % self.board, 'metadata', - cros_build_lib.GetSysroot(board=self.board), 'ebuild', - self.fullnamerev, 'HOMEPAGE', 'LICENSE'] - tmp = cros_build_lib.RunCommand(args, print_cmd=debug, - redirect_stdout=True) - lines = tmp.output.splitlines() - # Runs: - # portageq metadata /build/x86-alex ebuild net-misc/wget-1.12-r2 \ - # HOMEPAGE LICENSE - # Returns: - # http://www.gnu.org/software/wget/ - # GPL-3 - self.homepages, self.ebuild_license_names = ( - lines[0].split(), lines[1].split()) - - def _TestEbuildContents(self): - """Discover if the ebuild installed any files. - - Returns: - bool which tells if any files were installed. - """ - # Search for anything the ebuild might install, other than a directory. - args = ['equery-%s' % self.board, '-q', '-C', 'files', self.fullnamerev, - '-f', 'obj'] - tmp = cros_build_lib.RunCommand(args, print_cmd=debug, redirect_stdout=True) - lines = tmp.output.splitlines() - - # lines is an array of the file names installed by the ebuild. - return bool(lines) - - def GetLicenses(self): - """Get licenses from the ebuild field and the unpacked source code. - - Some packages have static license mappings applied to them that get - retrieved from the ebuild. - - For others, we figure out whether the package source should be scanned to - add licenses found there. - - Raises: - AssertionError: on runtime errors - PackageLicenseError: couldn't find license in ebuild and source. - """ - if self.build_source_tree: - # If the total size installed is zero, we installed no content to license. - if self._BuildInfo("SIZE").strip() == '0': - self.skip = True - return - self.homepages = self._BuildInfo("HOMEPAGE").split() - self.ebuild_license_names = self._BuildInfo("LICENSE").split() - else: - self._FindEbuildPath() - self._ReadEbuildMetadata() - self.skip = self.skip or not self._TestEbuildContents() - - # If this ebuild only uses skipped licenses, skip it. - if (self.ebuild_license_names and - all(l in SKIPPED_LICENSES for l in self.ebuild_license_names)): - self.skip = True - - if self.skip: - return - - if self.fullname in PACKAGE_HOMEPAGES: - self.homepages = PACKAGE_HOMEPAGES[self.fullname] - - # Packages with missing licenses or licenses that need mapping (like - # BSD/MIT) are hardcoded here: - if self.fullname in PACKAGE_LICENSES: - self.ebuild_license_names = PACKAGE_LICENSES[self.fullname] - logging.info("Static license mapping for %s: %s", self.fullnamerev, - ",".join(self.ebuild_license_names)) - else: - logging.info("Read licenses for %s: %s", self.fullnamerev, - ",".join(self.ebuild_license_names)) - # Lots of packages in chromeos-base have their license set to BSD instead - # of BSD-Google: - new_license_names = [] - for license_name in self.ebuild_license_names: - # TODO: temp workaround for http;//crbug.com/348750 , remove when the bug - # is fixed. - if (license_name == "BSD" and - self.fullnamerev.startswith("chromeos-base/")): - license_name = "BSD-Google" - logging.error( - "Fixed BSD->BSD-Google for %s because it's in chromeos-base. " - "Please fix the LICENSE field in the ebuild", self.fullnamerev) - # TODO: temp workaround for http;//crbug.com/348749 , remove when the bug - # is fixed. - if license_name == "Proprietary": - license_name = "Google-TOS" - logging.error( - "Fixed Proprietary -> Google-TOS for %s. " - "Please fix the LICENSE field in the ebuild", self.fullnamerev) - new_license_names.append(license_name) - self.ebuild_license_names = new_license_names +from chromite.licensing import licenses_lib - # The ebuild license field can look like: - # LICENSE="GPL-3 LGPL-3 Apache-2.0" (this means AND, as in all 3) - # for third_party/portage-stable/app-admin/rsyslog/rsyslog-5.8.11.ebuild - # LICENSE="|| ( LGPL-2.1 MPL-1.1 )" - # for third_party/portage-stable/x11-libs/cairo/cairo-1.8.8.ebuild - - # The parser isn't very smart and only has basic support for the - # || ( X Y ) OR logic to do the following: - # In order to save time needlessly unpacking packages and looking or a - # cleartext license (which is really a crapshoot), if we have a license - # like BSD that requires looking for copyright attribution, but we can - # chose another license like GPL, we do that. - - if not self.skip and not self.ebuild_license_names: - logging.error("%s: no license found in ebuild. FIXME!", self.fullnamerev) - # In a bind, you could comment this out. I'm making the output fail to - # get your attention since this error really should be fixed, but if you - # comment out the next line, the script will try to find a license inside - # the source. - raise PackageLicenseError() - - # This is not invalid, but the parser can't deal with it, so if it ever - # happens, error out to tell the programmer to do something. - # dev-python/pycairo-1.10.0-r4: LGPL-3 || ( LGPL-2.1 MPL-1.1 ) - if "||" in self.ebuild_license_names[1:]: - logging.error("%s: Can't parse || in the middle of a license: %s", - self.fullnamerev, ' '.join(self.ebuild_license_names)) - raise PackageLicenseError() - - or_licenses_and_one_is_no_attribution = False - # We do a quick early pass first so that the longer pass below can - # run accordingly. - for license_name in [x for x in self.ebuild_license_names - if x not in LICENCES_IGNORE]: - # Here we have an OR case, and one license that we can use stock, so - # we remember that in order to be able to skip license attributions if - # any were in the OR. - if (self.ebuild_license_names[0] == "||" and - license_name not in COPYRIGHT_ATTRIBUTION_LICENSES): - or_licenses_and_one_is_no_attribution = True - - for license_name in [x for x in self.ebuild_license_names - if x not in LICENCES_IGNORE]: - # Licenses like BSD or MIT can't be used as is because they do not contain - # copyright self. They have to be replaced by copyright file given in the - # source code, or manually mapped by us in PACKAGE_LICENSES - if license_name in COPYRIGHT_ATTRIBUTION_LICENSES: - # To limit needless efforts, if a package is BSD or GPL, we ignore BSD - # and use GPL to avoid scanning the package, but we can only do this if - # or_licenses_and_one_is_no_attribution has been set above. - # This ensures that if we have License: || (BSD3 BSD4), we will - # look in the source. - if or_licenses_and_one_is_no_attribution: - logging.info("%s: ignore license %s because ebuild LICENSES had %s", - self.fullnamerev, license_name, - ' '.join(self.ebuild_license_names)) - else: - logging.info("%s: can't use %s, will scan source code for copyright", - self.fullnamerev, license_name) - self.need_copyright_attribution = True - self.scan_source_for_licenses = True - else: - self.license_names.add(license_name) - # We can't display just 2+ because it only contains text that says to - # read v2 or v3. - if license_name == 'GPL-2+': - self.license_names.add('GPL-2') - if license_name == 'LGPL-2+': - self.license_names.add('LGPL-2') - - if license_name in LOOK_IN_SOURCE_LICENSES: - logging.info("%s: Got %s, will try to find better license in source...", - self.fullnamerev, license_name) - self.scan_source_for_licenses = True - - if self.license_names: - logging.info('%s: using stock|cust license(s) %s', - self.fullnamerev, ','.join(self.license_names)) - - # If the license(s) could not be found, or one requires copyright - # attribution, dig in the source code for license files: - # For instance: - # Read licenses from ebuild for net-dialup/ppp-2.4.5-r3: BSD,GPL-2 - # We need get the substitution file for BSD and add it to GPL. - if self.scan_source_for_licenses: - self._ExtractLicenses() - - # This shouldn't run, but leaving as sanity check. - if not self.license_names and not self.license_text_scanned: - raise AssertionError("Didn't find usable licenses for %s" % - self.fullnamerev) - - -class Licensing(object): - """Do the actual work of extracting licensing info and outputting html.""" - - def __init__(self, board, package_fullnames, gen_licenses): - # eg x86-alex - self.board = board - # List of stock and custom licenses referenced in ebuilds. Used to - # print a report. Dict value says which packages use that license. - self.licenses = {} - - # Licenses are supposed to be generated at package build time and be - # ready for us, but in case they're not, they can be generated. - self.gen_licenses = gen_licenses - - # This keeps track of whether we have an incomplete license file due to - # package errors during parsing. - # Any non empty list at the end shows the list of packages that caused - # errors. - self.incomplete_packages = [] - - self.package_text = {} - self.entry_template = None - - # We need to have a dict for the list of packages objects, index by package - # fullnamerev, so that when we scan our licenses at the end, and find out - # some shared licenses are only used by one package, we can access that - # package object by name, and add the license directly in that object. - self.packages = {} - self._package_fullnames = package_fullnames - - @property - def sorted_licenses(self): - return sorted(self.licenses.keys(), key=str.lower) - - def _SaveLicenseDump(self, pkg): - if pkg.build_source_tree: - save_file = "%s/build-info/license.yaml" % pkg.build_source_tree - else: - save_file = pkg.license_dump_path - logging.debug("Saving license to %s", save_file) - save_dir = os.path.dirname(save_file) - if not os.path.isdir(save_dir): - os.makedirs(save_dir, 0755) - with open(save_file, "w") as f: - yaml_dump = [] - for key, value in pkg.__dict__.items(): - yaml_dump.append([key, value]) - f.write(yaml.dump(yaml_dump)) - - def _LoadLicenseDump(self, pkg): - save_file = pkg.license_dump_path - logging.debug("Getting license from %s for %s", save_file, pkg.name) - with open(save_file, "r") as f: - # yaml.safe_load barfs on unicode it output, but we don't really need it. - yaml_dump = yaml.load(f) - for key, value in yaml_dump: - pkg.__dict__[key] = value - - def LicensedPackages(self, license_name): - """Return list of packages using a given license.""" - return self.licenses[license_name] - - def LoadPackageInfo(self, board): - """Populate basic package info for all packages from their ebuild.""" - for package_name in self._package_fullnames: - pkg = PackageInfo() - pkg.board = board - pkg.GetPackageInfo(package_name) - self.packages[package_name] = pkg - - def HookPackageProcess(self, pkg_build_path): - """Different entry point to populate a packageinfo. - - This is called instead of LoadPackageInfo when called by a package build. - - Args: - pkg_build_path: unpacked being built by emerge. - """ - pkg = PackageInfo() - pkg.build_source_tree = pkg_build_path - pkg.GetPackageInfo(None) - if not pkg.skip: - pkg.GetLicenses() - self._SaveLicenseDump(pkg) - - def ProcessPackageLicenses(self): - """Iterate through all packages provided and gather their licenses. - - GetLicenses will scrape licenses from the code and/or gather stock license - names. We gather the list of stock and custom ones for later processing. - - Do not call this after adding virtual packages with AddExtraPkg. - """ - for package_name in self.packages: - pkg = self.packages[package_name] - if pkg.skip: - if self.gen_licenses: - logging.info("Package %s is in skip list", package_name) - else: - # If we do a licensing run expecting to get licensing objects from - # an image build, virtual packages will be missing such objects - # because virtual packages do not get the install hook run at build - # time. Because this script may not have permissions to write in the - # /var/db/ directory, we don't want it to generate useless license - # bits for virtual packages. As a result, ignore virtual packages - # here. - if pkg.category == "virtual": - logging.debug("Ignoring %s virtual package", package_name) - continue - - # Other skipped packages get dumped with incomplete info and the skip flag - if not os.path.exists(pkg.license_dump_path) and not self.gen_licenses: - logging.warning(">>> License for %s is missing, creating now <<<", - package_name) - if not os.path.exists(pkg.license_dump_path) or self.gen_licenses: - if not pkg.skip: - try: - pkg.GetLicenses() - except PackageLicenseError: - pkg.licensing_failed = True - # We dump packages where licensing failed too. - self._SaveLicenseDump(pkg) - - # To debug the code, we force the data to be re-read from the dumps - # instead of reusing what we may have in memory. - for package_name in self.packages: - pkg = self.packages[package_name] - if pkg.category == "virtual": - continue - - self._LoadLicenseDump(pkg) - logging.debug("loaded dump for %s", pkg.fullnamerev) - if pkg.skip: - logging.info("Package %s is in skip list", pkg.fullnamerev) - if pkg.licensing_failed: - logging.info("Package %s failed licensing", pkg.fullnamerev) - self.incomplete_packages += [pkg.fullnamerev] - - def AddExtraPkg(self, pkg_data): - """Allow adding pre-created virtual packages. - - GetLicenses will not work on them, so add them after having run - ProcessPackages. - - Args: - pkg_data: array of package data as defined below - """ - pkg = PackageInfo() - pkg.board = self.board - pkg.category = pkg_data[0] - pkg.name = pkg_data[1] - pkg.version = pkg_data[2] - pkg.homepages = pkg_data[3] # this is a list - pkg.license_names = pkg_data[4] # this is also a list - pkg.ebuild_license_names = pkg_data[4] - self.packages[pkg.fullnamerev] = pkg - - # Called directly by src/repohooks/pre-upload.py - @staticmethod - def FindLicenseType(license_name): - """Says if a license is stock Gentoo, custom, or doesn't exist.""" - - for directory in STOCK_LICENSE_DIRS: - path = '%s/%s' % (directory, license_name) - if os.path.exists(path): - return "Gentoo Package Stock" - - for directory in CUSTOM_LICENSE_DIRS: - path = '%s/%s' % (directory, license_name) - if os.path.exists(path): - return "Custom" - - if license_name in SKIPPED_LICENSES: - return "Custom" - - raise AssertionError(""" -license %s could not be found in %s -If the license in the ebuild is correct, -a) a stock license should be added to portage-stable/licenses : -running `cros_portage_upgrade` inside of the chroot should clone this repo -to /tmp/portage/: -https://chromium.googlesource.com/chromiumos/overlays/portage/+/gentoo -find the new licenses under licenses, and add them to portage-stable/licenses - -b) if it's a non gentoo package with a custom license, you can copy that license -to third_party/chromiumos-overlay/licenses/ - -Try re-running the script with -p cat/package-ver --generate -after fixing the license.""" % - (license_name, - '\n'.join(STOCK_LICENSE_DIRS + CUSTOM_LICENSE_DIRS)) - ) - - @staticmethod - def ReadSharedLicense(license_name): - """Read and return stock or cust license file specified in an ebuild.""" - - license_path = None - for directory in STOCK_LICENSE_DIRS + CUSTOM_LICENSE_DIRS: - path = os.path.join(directory, license_name) - if os.path.exists(path): - license_path = path - break - - if license_path: - return ReadUnknownEncodedFile(license_path, "read license") - else: - raise AssertionError("license %s could not be found in %s" - % (license_name, - '\n'.join(STOCK_LICENSE_DIRS + - CUSTOM_LICENSE_DIRS)) - ) - - @staticmethod - def EvaluateTemplate(template, env): - """Expand a template with vars like {{foo}} using a dict of expansions.""" - # TODO switch to stock python templates. - for key, val in env.iteritems(): - template = template.replace('{{%s}}' % key, val) - return template - - def _GeneratePackageLicenseText(self, pkg): - """Concatenate all licenses related to a pkg. - - This means a combination of ebuild shared licenses and licenses read from - the pkg source tree, if any. - - Args: - pkg: PackageInfo object - - Raises: - AssertionError: on runtime errors - """ - license_text = [] - for license_text_scanned in pkg.license_text_scanned: - license_text.append(license_text_scanned) - license_text.append('%s\n' % ('-=' * 40)) - - license_pointers = [] - # sln: shared license name. - for sln in pkg.license_names: - # Says whether it's a stock gentoo or custom license. - license_type = self.FindLicenseType(sln) - license_pointers.append( - "<li><a href='#%s'>%s License %s</a></li>" % ( - sln, license_type, sln)) - - # This should get caught earlier, but one extra check. - if not license_text + license_pointers: - raise AssertionError('Ended up with no license_text for %s', pkg.name) - - env = { - 'name': "%s-%s" % (pkg.name, pkg.version), - 'url': cgi.escape(pkg.homepages[0]) if pkg.homepages else '', - 'licenses_txt': cgi.escape('\n'.join(license_text)) or '', - 'licenses_ptr': '\n'.join(license_pointers) or '', - } - self.package_text[pkg] = self.EvaluateTemplate(self.entry_template, env) - - def GenerateHTMLLicenseOutput(self, output_file, - output_template=TMPL, - entry_template=ENTRY_TMPL, - license_template=SHARED_LICENSE_TMPL): - """Generate the combined html license file used in ChromeOS. - - Args: - output_file: resulting HTML license output. - output_template: template for the entire HTML file. - entry_template: template for per package entries. - license_template: template for shared license entries. - """ - self.entry_template = ReadUnknownEncodedFile(entry_template) - sorted_license_txt = [] - - # Keep track of which licenses are used by which packages. - for pkg in self.packages.values(): - if pkg.skip or pkg.licensing_failed: - continue - for sln in pkg.license_names: - self.licenses.setdefault(sln, []).append(pkg.fullnamerev) - - # Find licenses only used once, and roll them in the package that uses them. - # We use keys() because licenses is modified in the loop, so we can't use - # an iterator. - for sln in self.licenses.keys(): - if len(self.licenses[sln]) == 1: - pkg_fullnamerev = self.licenses[sln][0] - logging.info("Collapsing shared license %s into single use license " - "(only used by %s)", sln, pkg_fullnamerev) - license_type = self.FindLicenseType(sln) - license_txt = self.ReadSharedLicense(sln) - single_license = "%s License %s:\n\n%s" % (license_type, sln, - license_txt) - pkg = self.packages[pkg_fullnamerev] - pkg.license_text_scanned.append(single_license) - pkg.license_names.remove(sln) - del self.licenses[sln] - - for pkg in sorted(self.packages.values(), - key=lambda x: (x.name.lower(), x.version, x.revision)): - if pkg.skip: - logging.debug("Skipping package %s", pkg.fullnamerev) - continue - if pkg.licensing_failed: - logging.debug("Package %s failed licensing, skipping", pkg.fullnamerev) - continue - self._GeneratePackageLicenseText(pkg) - sorted_license_txt += [self.package_text[pkg]] - - # Now generate the bottom of the page that will contain all the shared - # licenses and a list of who is pointing to them. - license_template = ReadUnknownEncodedFile(license_template) - - licenses_txt = [] - for license_name in self.sorted_licenses: - env = { - 'license_name': license_name, - 'license': cgi.escape(self.ReadSharedLicense(license_name)), - 'license_type': self.FindLicenseType(license_name), - 'license_packages': ' '.join(self.LicensedPackages(license_name)), - } - licenses_txt += [self.EvaluateTemplate(license_template, env)] - - file_template = ReadUnknownEncodedFile(output_template) - env = { - 'entries': '\n'.join(sorted_license_txt), - 'licenses': '\n'.join(licenses_txt), - } - osutils.WriteFile(output_file, - self.EvaluateTemplate(file_template, env).encode('UTF-8')) - - -def ListInstalledPackages(board, all_packages=False): - """Return a list of all packages installed for a particular board.""" - - # If all_packages is set to True, all packages visible in the build - # chroot are used to generate the licensing file. This is not what you want - # for a release license file, but it's a way to run licensing checks against - # all packages. - # If it's set to False, it will only generate a licensing file that contains - # packages used for a release build (as determined by the dependencies for - # virtual/target-os). - - if all_packages: - # The following returns all packages that were part of the build tree - # (many get built or used during the build, but do not get shipped). - # Note that it also contains packages that are in the build as - # defined by build_packages but not part of the image we ship. - args = ["equery-%s" % board, "list", "*"] - packages = cros_build_lib.RunCommand(args, print_cmd=debug, - redirect_stdout=True - ).output.splitlines() - else: - # The following returns all packages that were part of the build tree - # (many get built or used during the build, but do not get shipped). - # Note that it also contains packages that are in the build as - # defined by build_packages but not part of the image we ship. - args = ["emerge-%s" % board, "--with-bdeps=y", "--usepkgonly", - "--emptytree", "--pretend", "--color=n", "virtual/target-os"] - emerge = cros_build_lib.RunCommand(args, print_cmd=debug, - redirect_stdout=True).output.splitlines() - # Another option which we've decided not to use, is bdeps=n. This outputs - # just the packages we ship, but does not packages that were used to build - # them, including a package like flex which generates a .a that is included - # and shipped in ChromeOS. - # We've decided to credit build packages, even if we're not legally required - # to (it's always nice to do), and that way we get corner case packages like - # flex. This is why we use bdep=y and not bdep=n. - - packages = [] - # [binary R ] x11-libs/libva-1.1.1 to /build/x86-alex/ - pkg_rgx = re.compile(r'\[[^]]+R[^]]+\] (.+) to /build/.*') - # If we match something else without the 'R' like - # [binary U ] chromeos-base/pepper-flash-13.0.0.133-r1 [12.0.0.77-r1] - # this is bad and we should die on this. - pkg_rgx2 = re.compile(r'(\[[^]]+\] .+) to /build/.*') - for line in emerge: - match = pkg_rgx.search(line) - match2 = pkg_rgx2.search(line) - if match: - packages.append(match.group(1)) - elif match2: - raise AssertionError("Package incorrectly installed, try eclean-%s" % - board, "\n%s" % match2.group(1)) - - return packages - - -def _HandleIllegalXMLChars(text): - """Handles illegal XML Characters. - - XML 1.0 acceptable character range: - Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | \ - [#x10000-#x10FFFF] - - This function finds all illegal characters in the text and filters - out all whitelisted characters (e.g. ^L). - - Args: - text: text to examine. - - Returns: - Filtered |text| and a list of non-whitelisted illegal characters found. - """ - whitelist_re = re.compile(u'[\x0c]') - text = whitelist_re.sub('', text) - # illegal_chars_re includes all illegal characters (whitelisted or - # not), so we can expand the whitelist without modifying this line. - illegal_chars_re = re.compile( - u'[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]') - return (text, illegal_chars_re.findall(text)) - - -def ReadUnknownEncodedFile(file_path, logging_text=None): - """Read a file of unknown encoding (UTF-8 or latin) by trying in sequence. - - Args: - file_path: what to read. - logging_text: what to display for logging depending on file read. - - Returns: - File content, possibly converted from latin1 to UTF-8. - - Raises: - Assertion error: if non-whitelisted illegal XML characters - are found in the file. - ValueError: returned if we get invalid XML. - """ - try: - with codecs.open(file_path, encoding="utf-8") as c: - file_txt = c.read() - if logging_text: - logging.info("%s %s (UTF-8)", logging_text, file_path) - except UnicodeDecodeError: - with codecs.open(file_path, encoding="latin1") as c: - file_txt = c.read() - if logging_text: - logging.info("%s %s (latin1)", logging_text, file_path) - - file_txt, char_list = _HandleIllegalXMLChars(file_txt) - - if char_list: - raise ValueError('Illegal XML characters %s found in %s.' % - (char_list, file_path)) - - return file_txt - - -def NonHookMain(opts): +# These packages exist as workarounds.... +# +# X is listed to avoid installing licensing info for all split X packages. +# sys-boot packages are listed as a partial work around for not having per-board +# credits files (TODO(dgarrett): Remove when crbug.com/197970 fixed). +EXTRA_PACKAGES = ( + ('x11-base', 'X.Org', '1.9.3', ['http://www.x.org/'], ['X']), + ('sys-kernel', 'Linux', '2.6', ['http://www.kernel.org/'], ['GPL-2']), + ('sys-boot', 'u-boot', '2013.06', ['http://www.denx.de/wiki/U-Boot'], + ['GPL-2']), + ('sys-boot', 'coreboot', '2013.04', ['http://www.coreboot.org/'], + ['GPL-2']), +) + + +def LoadPackageInfo(board, all_packages, generateMissing, packages): """Do the work when we're not called as a hook.""" - - board, all_packages, gen_licenses, output_file = ( - opts.board, opts.all_packages, opts.gen_licenses, opts.output) - packages_mode = bool(opts.package) - - if not board: - raise AssertionError("No board given (--board)") logging.info("Using board %s.", board) builddir = os.path.join(cros_build_lib.GetSysroot(board=board), 'tmp', 'portage') + if not os.path.exists(builddir): raise AssertionError( "FATAL: %s missing.\n" "Did you give the right board and build that tree?" % builddir) - if not output_file and not gen_licenses: - logging.warning("You are not generating licenses and you didn't ask for " - "output. As a result this script will do nothing useful.") - license_dir = "%s/%s/" % (cros_build_lib.GetSysroot(board), - PER_PKG_LICENSE_DIR) - if not os.path.exists(license_dir): - raise AssertionError("FATAL: %s missing.\n" % license_dir) - if gen_licenses and os.geteuid() != 0: - raise AssertionError("Run with sudo if you use --generate-licenses.") + detect_packages = not packages + if detect_packages: + # If no packages were specified, we look up the full list. + packages = licenses_lib.ListInstalledPackages(board, all_packages) - if packages_mode: - packages = opts.package - else: - packages = ListInstalledPackages(board, all_packages) if not packages: raise AssertionError('FATAL: Could not get any packages for board %s' % board) + logging.debug("Initial Package list to work through:\n%s", '\n'.join(sorted(packages))) - licensing = Licensing(board, packages, gen_licenses) + licensing = licenses_lib.Licensing(board, packages, generateMissing) + licensing.LoadPackageInfo(board) logging.debug("Package list to skip:\n%s", '\n'.join([p for p in sorted(packages) @@ -1404,66 +177,56 @@ def NonHookMain(opts): '\n'.join([p for p in sorted(packages) if not licensing.packages[p].skip])) licensing.ProcessPackageLicenses() - if not packages_mode: - # We add 2 virtual packages as well as 2 boot packages that are included - # with some hardware, but not in the image or package list. - for extra_pkg in [ - ['x11-base', 'X.Org', '1.9.3', ['http://www.x.org/'], ['X']], - ['sys-kernel', 'Linux', '2.6', ['http://www.kernel.org/'], ['GPL-2']], - ['sys-boot', 'u-boot', '2013.06', ['http://www.denx.de/wiki/U-Boot'], - ['GPL-2+']], - ['sys-boot', 'coreboot', '2013.04', ['http://www.coreboot.org/'], - ['GPL-2']], - ]: + if detect_packages: + # If we detected 'all' packages, we have to add in these extras. + for extra_pkg in EXTRA_PACKAGES: licensing.AddExtraPkg(extra_pkg) - if output_file: - licensing.GenerateHTMLLicenseOutput(output_file) - if licensing.incomplete_packages: raise AssertionError(""" -DO NOT USE OUTPUT!!! Some packages are missing due to errors, please look at errors generated during this run. List of packages with errors: %s """ % '\n'.join(licensing.incomplete_packages)) + return licensing -def main(args): - # pylint: disable=W0603 - global debug - # pylint: enable=W0603 +def main(args): parser = commandline.ArgumentParser(usage=__doc__) parser.add_argument("-b", "--board", help="which board to run for, like x86-alex") parser.add_argument("-p", "--package", action="append", default=[], + dest="packages", help="check the license of the package, e.g.," "dev-libs/libatomic_ops-7.2d") parser.add_argument("-a", "--all-packages", action="store_true", dest="all_packages", help="Run licensing against all packages in the " - "build tree") + "build tree, instead of just virtual/target-os " + "dependencies.") parser.add_argument("-g", "--generate-licenses", action="store_true", dest="gen_licenses", help="Generate licensing bits for each package before " "making license file\n(default is to use build time " "license bits)") - parser.add_argument("-k", "--hook", type="path", dest="hook", - help="Hook mode takes a single package and outputs its " - "license on stdout. Give $PORTAGE_BUILDDIR as argument.") parser.add_argument("-o", "--output", type="path", help="which html file to create with output") opts = parser.parse_args(args) - debug = opts.debug - debug = True - hook_path = opts.hook - # This get called from src/scripts/hooks/install/gen-package-licenses.sh - if hook_path: - licensing = Licensing(None, None, True) - licensing.HookPackageProcess(hook_path) - else: - NonHookMain(opts) + if not opts.board: + raise AssertionError("No board given (--board)") + + if not opts.output and not opts.gen_licenses: + raise AssertionError("You must specify --output and/or --generate-licenses") + + if opts.gen_licenses and os.geteuid() != 0: + raise AssertionError("Run with sudo if you use --generate-licenses.") + + licensing = LoadPackageInfo( + opts.board, opts.all_packages, opts.gen_licenses, opts.packages) + + if opts.output: + licensing.GenerateHTMLLicenseOutput(opts.output) diff --git a/licensing/licenses_lib.py b/licensing/licenses_lib.py new file mode 100644 index 000000000..c116814ef --- /dev/null +++ b/licensing/licenses_lib.py @@ -0,0 +1,1241 @@ +#!/usr/bin/python +# Copyright 2012 The Chromium OS Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Library for validating ebuild license information, and generating credits. + +Documentation on this script is also available here: + http://www.chromium.org/chromium-os/licensing +""" + +import cgi +import codecs +import logging +import os +import re +import tempfile + +from chromite.cbuildbot import constants +from chromite.cbuildbot import portage_utilities +from chromite.lib import cros_build_lib +from chromite.lib import osutils + +# We are imported by src/repohooks/pre-upload.py in a non chroot environment +# where yaml may not be there, so we don't error on that since it's not needed +# in that case. +try: + import yaml +except ImportError: + yaml = None + +debug = True + +# See http://crbug.com/207004 for discussion. +PER_PKG_LICENSE_DIR = '/var/db/pkg' + +STOCK_LICENSE_DIRS = [ + os.path.join(constants.SOURCE_ROOT, + 'src/third_party/portage-stable/licenses'), +] + +# There are licenses for custom software we got and isn't part of +# upstream gentoo. +CUSTOM_LICENSE_DIRS = [ + os.path.join(constants.SOURCE_ROOT, + 'src/third_party/chromiumos-overlay/licenses'), +] + +COPYRIGHT_ATTRIBUTION_DIR = ( + os.path.join( + constants.SOURCE_ROOT, + 'src/third_party/chromiumos-overlay/licenses/copyright-attribution')) + +# Virtual packages don't need to have a license and often don't, so we skip them +# chromeos-base contains google platform packages that are covered by the +# general license at top of tree, so we skip those too. +SKIPPED_CATEGORIES = [ + 'virtual', +] + +SKIPPED_PACKAGES = [ + # Fix these packages by adding a real license in the code. + # You should not skip packages just because the license scraping doesn't + # work. Stick those special cases into PACKAGE_LICENSES. + # Packages should only be here because they are sub/split packages already + # covered by the license of the main package. + + # These are Chrome-OS-specific packages, copyright BSD-Google + 'sys-kernel/chromeos-kernel', # already manually credit Linux +] + +SKIPPED_LICENSES = [ + # Some of our packages contain binary blobs for which we have special + # negotiated licenses, and no need to display anything publicly. Strongly + # consider using Google-TOS instead, if possible. + 'Proprietary-Binary', + + # If you have an early repo for which license terms have yet to be decided + # use this. It will cause licensing for the package to be mostly ignored. + # Official should error for any package with this license. + 'TAINTED', # TODO(dgarrett): Error on official builds with this license. +] + +LICENSE_NAMES_REGEX = [ + r'^copyright$', + r'^copyright[.]txt$', + r'^copyright[.]regex$', # llvm + r'^copying.*$', + r'^licen[cs]e.*$', + r'^licensing.*$', # libatomic_ops + r'^ipa_font_license_agreement_v1[.]0[.]txt$', # ja-ipafonts + r'^PKG-INFO$', # copyright assignment for + # some python packages + # (netifaces, unittest2) +] + +# These are _temporary_ license mappings for packages that do not have a valid +# shared/custom license, or LICENSE file we can use. +# Once this script runs earlier (during the package build process), it will +# block new source without a LICENSE file if the ebuild contains a license +# that requires copyright assignment (BSD and friends). +# At that point, new packages will get fixed to include LICENSE instead of +# adding workaround mappings like those below. +# The way you now fix copyright attribution cases create a custom file with the +# right license directly in COPYRIGHT_ATTRIBUTION_DIR. +PACKAGE_LICENSES = { + # TODO: replace the naive license parsing code in this script with a hook + # into portage's license parsing. See http://crbug.com/348779 + + # Chrome (the browser) is complicated, it has a morphing license that is + # either BSD-Google, or BSD-Google,Google-TOS depending on how it was + # built. We bypass this problem for now by hardcoding the Google-TOS bit as + # per ChromeOS with non free bits + 'chromeos-base/chromeos-chrome': ['BSD-Google', 'Google-TOS'], + + # Currently the code cannot parse LGPL-3 || ( LGPL-2.1 MPL-1.1 ) + 'dev-python/pycairo': ['LGPL-3', 'LGPL-2.1'], +} + +# Any license listed list here found in the ebuild will make the code look for +# license files inside the package source code in order to get copyright +# attribution from them. +COPYRIGHT_ATTRIBUTION_LICENSES = [ + 'BSD', # requires distribution of copyright notice + 'BSD-2', # so does BSD-2 http://opensource.org/licenses/BSD-2-Clause + 'BSD-3', # and BSD-3? http://opensource.org/licenses/BSD-3-Clause + 'BSD-4', # and 4? + 'BSD-with-attribution', + 'MIT', + 'MIT-with-advertising', + 'Old-MIT', +] + +# The following licenses are not invalid or to show as a less helpful stock +# license, but it's better to look in the source code for a more specific +# license if there is one, but not an error if no better one is found. +# Note that you don't want to set just anything here since any license here +# will be included once in stock form and a second time in custom form if +# found (there is no good way to know that a license we found on disk is the +# better version of the stock version, so we show both). +LOOK_IN_SOURCE_LICENSES = [ + 'as-is', # The stock license is very vague, source always has more details. + 'PSF-2', # The custom license in python is more complete than the template. + + # As far as I know, we have no requirement to do copyright attribution for + # these licenses, but the license included in the code has slightly better + # information than the stock Gentoo one (including copyright attribution). + 'BZIP2', # Single use license, do copyright attribution. + 'OFL', # Almost single use license, do copyright attribution. + 'OFL-1.1', # Almost single use license, do copyright attribution. + 'UoI-NCSA', # Only used by NSCA, might as well show their custom copyright. +] + +# This used to provide overrides. I can't find a valid reason to add any more +# here, though. +PACKAGE_HOMEPAGES = { + # Example: + # 'x11-proto/glproto': ['http://www.x.org/'], +} + +# These are tokens found in LICENSE= in an ebuild that aren't licenses we +# can actually read from disk. +# You should not use this to blacklist real licenses. +LICENCES_IGNORE = [ + ')', # Ignore OR tokens from LICENSE="|| ( LGPL-2.1 MPL-1.1 )" + '(', + '||', +] + +TMPL = 'about_credits.tmpl' +ENTRY_TMPL = 'about_credits_entry.tmpl' +SHARED_LICENSE_TMPL = 'about_credits_shared_license_entry.tmpl' + + +# This is called directly by src/repohooks/pre-upload.py +def GetLicenseTypesFromEbuild(ebuild_path): + """Returns a list of license types from the ebuild file. + + This function does not always return the correct list, but it is + faster than using portageq for not having to access chroot. It is + intended to be used for tasks such as presubmission checks. + + Args: + ebuild_path: ebuild to read. + + Returns: + list of licenses read from ebuild. + + Raises: + ValueError: ebuild errors. + """ + ebuild_env_tmpl = """ +has() { [[ " ${*:2} " == *" $1 "* ]]; } +inherit() { + local overlay_list="%(overlay_list)s" + local eclass overlay f + for eclass; do + has ${eclass} ${_INHERITED_} && continue + _INHERITED_+=" ${eclass}" + for overlay in %(overlay_list)s; do + f="${overlay}/eclass/${eclass}.eclass" + if [[ -e ${f} ]]; then + source "${f}" + break + fi + done + done +} +source %(ebuild)s""" + + # TODO: the overlay_list hard-coded here should be changed to look + # at the current overlay, and then the master overlays. E.g. for an + # ebuild file in overlay-parrot, we will look at parrot overlay + # first, and then look at portage-stable and chromiumos, which are + # listed as masters in overlay-parrot/metadata/layout.conf. + tmpl_env = { + 'ebuild': ebuild_path, + 'overlay_list': '%s %s' % ( + os.path.join(constants.SOURCE_ROOT, + 'src/third_party/chromiumos-overlay'), + os.path.join(constants.SOURCE_ROOT, + 'src/third_party/portage-stable')) + } + + with tempfile.NamedTemporaryFile(bufsize=0) as f: + osutils.WriteFile(f.name, ebuild_env_tmpl % tmpl_env) + env = osutils.SourceEnvironment( + f.name, whitelist=['LICENSE'], ifs=' ', multiline=True) + + if not env.get('LICENSE'): + raise ValueError('No LICENSE found in the ebuild.') + if re.search(r'[,;]', env['LICENSE']): + raise ValueError( + 'LICENSE field in the ebuild should be whitespace-limited.') + + return env['LICENSE'].split() + + +class PackageLicenseError(Exception): + """Thrown if something fails while getting license information for a package. + + This will cause the processing to error in the end. + """ + + +class PackageInfo(object): + """Package info containers, mostly for storing licenses.""" + + def __init__(self): + + self.board = None + self.revision = None + + # Array of scanned license texts. + self.license_text_scanned = [] + + self.category = None + self.name = None + self.version = None + + # Looks something like this + # /mnt/host/source/src/ + # third_party/portage-stable/net-misc/rsync/rsync-3.0.8.ebuild + self.ebuild_path = None + + # Array of license names retrieved from ebuild or override in this code. + self.ebuild_license_names = [] + self.homepages = [] + # This contains licenses names we can read from Gentoo or custom licenses. + # These are supposed to be shared licenses (i.e. licenses referenced by + # more then one package), but after all processing, we may find out that + # some are only used once and they get taken out of the shared pool and + # pasted directly in the sole package that was using them (see + # GenerateHTMLLicenseOutput). + self.license_names = set() + + # We set this if the ebuild has a BSD/MIT like license that requires + # scanning for a LICENSE file in the source code, or a static mapping + # in PACKAGE_LICENSES. Not finding one once this is set, is fatal. + self.need_copyright_attribution = False + # This flag just says we'd like to include licenses from the source, but + # not finding any is not fatal. + self.scan_source_for_licenses = False + + # After reading basic package information, we can mark the package as + # one to skip in licensing. + self.skip = False + + # If we failed to get licensing for this package, mark it as such so that + # it can be flagged when the full license file is being generated. + self.licensing_failed = False + + # If we are called from a hook, we grab package info from the soure tree. + # This is also used as a flag to know whether we should do package work + # based on an installed package, or one that is being built and we got + # called from the hook. + self.build_source_tree = None + + @property + def fullnamerev(self): + s = '%s-%s' % (self.fullname, self.version) + if self.revision: + s += '-r%s' % self.revision + return s + + @property + def fullname(self): + return '%s/%s' % (self.category, self.name) + + @property + def license_dump_path(self): + """e.g. /build/x86-alex//var/db/pkg/sys-apps/dtc-1.4.0/license.yaml.""" + return "%s/%s/%s/license.yaml" % (cros_build_lib.GetSysroot(self.board), + PER_PKG_LICENSE_DIR, self.fullnamerev) + + def _BuildInfo(self, filename): + filename = '%s/build-info/%s' % (self.build_source_tree, filename) + # Buildinfo properties we read are in US-ASCII, not Unicode. + try: + bi = open(filename).read().rstrip() + # Some properties like HOMEPAGE may be absent. + except IOError: + bi = "" + return bi + + def _RunEbuildPhases(self, phases): + """Run a list of ebuild phases on an ebuild. + + Args: + phases: list of phases like ['clean', 'fetch'] or ['unpack']. + + Returns: + ebuild command output + """ + + return cros_build_lib.RunCommand( + ['ebuild-%s' % self.board, self.ebuild_path] + phases, print_cmd=debug, + redirect_stdout=True) + + def _GetOverrideLicense(self): + """Look in COPYRIGHT_ATTRIBUTION_DIR for license with copyright attribution. + + For dev-util/bsdiff-4.3-r5, the code will look for + dev-util/bsdiff-4.3-r5 + dev-util/bsdiff-4.3 + dev-util/bsdiff + + It is ok to have more than one bsdiff license file, and an empty file acts + as a rubout (i.e. an empty dev-util/bsdiff-4.4 will shadow dev-util/bsdiff + and tell the licensing code to look in the package source for a license + instead of using dev-util/bsdiff as an override). + + Returns: + False (no license found) or a multiline license string. + """ + license_read = None + # dev-util/bsdiff-4.3-r5 -> bsdiff-4.3-r5 + filename = os.path.basename(self.fullnamerev) + license_path = os.path.join(COPYRIGHT_ATTRIBUTION_DIR, + os.path.dirname(self.fullnamerev)) + pv = portage_utilities.SplitPV(filename) + pv_no_rev = '%s-%s' % (pv.package, pv.version_no_rev) + for filename in (pv.pv, pv_no_rev, pv.package): + file_path = os.path.join(license_path, filename) + logging.debug("Looking for override copyright attribution license in %s", + file_path) + if os.path.exists(file_path): + # Turn + # /../merlin/trunk/src/third_party/chromiumos-overlay/../dev-util/bsdiff + # into + # chromiumos-overlay/../dev-util/bsdiff + short_dir_path = os.path.join(*file_path.rsplit(os.path.sep, 5)[1:]) + license_read = "Copyright Attribution License %s:\n\n" % short_dir_path + license_read += ReadUnknownEncodedFile( + file_path, "read copyright attribution license") + break + + return license_read + + def _ExtractLicenses(self): + """Scrounge for text licenses in the source of package we'll unpack. + + This is only called if we couldn't get usable licenses from the ebuild, + or one of them is BSD/MIT like which forces us to look for a file with + copyright attribution in the source code itself. + + First, we have a shortcut where we scan COPYRIGHT_ATTRIBUTION_DIR to see if + we find a license for this package. If so, we use that. + Typically it'll be used if the unpacked source does not have the license + that we're required to display for copyright attribution (in some cases it's + plain absent, in other cases, it could be in a filename we don't look for). + + Otherwise, we scan the unpacked source code for what looks like license + files as defined in LICENSE_NAMES_REGEX. + + Raises: + AssertionError: on runtime errors + PackageLicenseError: couldn't find copyright attribution file. + """ + license_override = self._GetOverrideLicense() + if license_override: + self.license_text_scanned = [license_override] + return + + if self.build_source_tree: + workdir = "%s/work" % self.build_source_tree + else: + self._RunEbuildPhases(['clean', 'fetch']) + output = self._RunEbuildPhases(['unpack']).output.splitlines() + # Output is spammy, it looks like this: + # * gc-7.2d.tar.gz RMD160 SHA1 SHA256 size ;-) ... [ ok ] + # * checking gc-7.2d.tar.gz ;-) ... [ ok ] + # * Running stacked hooks for pre_pkg_setup + # * sysroot_build_bin_dir ... + # [ ok ] + # * Running stacked hooks for pre_src_unpack + # * python_multilib_setup ... + # [ ok ] + # >>> Unpacking source... + # >>> Unpacking gc-7.2d.tar.gz to /build/x86-alex/tmp/po/[...]ps-7.2d/work + # >>> Source unpacked in /build/x86-alex/tmp/portage/[...]ops-7.2d/work + # So we only keep the last 2 lines, the others we don't care about. + output = [line for line in output if line[0:3] == ">>>" and + line != ">>> Unpacking source..."] + for line in output: + logging.info(line) + + args = ['portageq-%s' % self.board, 'envvar', 'PORTAGE_TMPDIR'] + result = cros_build_lib.RunCommand(args, print_cmd=debug, + redirect_stdout=True) + tmpdir = result.output.splitlines()[0] + # tmpdir gets something like /build/daisy/tmp/ + workdir = os.path.join(tmpdir, 'portage', self.fullnamerev, 'work') + + if not os.path.exists(workdir): + raise AssertionError("Unpack of %s didn't create %s. Version mismatch" % + (self.fullnamerev, workdir)) + + # You may wonder how deep should we go? + # In case of packages with sub-packages, it could be deep. + # Let's just be safe and get everything we can find. + # In the case of libatomic_ops, it's actually required to look deep + # to find the MIT license: + # dev-libs/libatomic_ops-7.2d/work/gc-7.2/libatomic_ops/doc/LICENSING.txt + args = ['find', workdir, '-type', 'f'] + result = cros_build_lib.RunCommand(args, print_cmd=debug, + redirect_stdout=True).output.splitlines() + # Truncate results to look like this: swig-2.0.4/COPYRIGHT + files = [x[len(workdir):].lstrip('/') for x in result] + license_files = [] + for name in files: + # When we scan a source tree managed by git, this can contain license + # files that are not part of the source. Exclude those. + # (e.g. .git/refs/heads/licensing) + if ".git/" in name: + continue + basename = os.path.basename(name) + # Looking for license.* brings up things like license.gpl, and we + # never want a GPL license when looking for copyright attribution, + # so we skip them here. We also skip regexes that can return + # license.py (seen in some code). + if re.search(r".*GPL.*", basename) or re.search(r"\.py$", basename): + continue + for regex in LICENSE_NAMES_REGEX: + if re.search(regex, basename, re.IGNORECASE): + license_files.append(name) + break + + if not license_files: + if self.need_copyright_attribution: + logging.error(""" +%s: unable to find usable license. +Typically this will happen because the ebuild says it's MIT or BSD, but there +was no license file that this script could find to include along with a +copyright attribution (required for BSD/MIT). + +If this is Google source, please change +LICENSE="BSD" +to +LICENSE="BSD-Google" + +If not, go investigate the unpacked source in %s, +and find which license to assign. Once you found it, you should copy that +license to a file under %s +(or you can modify LICENSE_NAMES_REGEX to pickup a license file that isn't +being scraped currently).""", + self.fullnamerev, workdir, COPYRIGHT_ATTRIBUTION_DIR) + raise PackageLicenseError() + else: + # We can get called for a license like as-is where it's preferable + # to find a better one in the source, but not fatal if we didn't. + logging.info("Was not able to find a better license for %s " + "in %s to replace the more generic one from ebuild", + self.fullnamerev, workdir) + + # Examples of multiple license matches: + # dev-lang/swig-2.0.4-r1: swig-2.0.4/COPYRIGHT swig-2.0.4/LICENSE + # dev-libs/glib-2.32.4-r1: glib-2.32.4/COPYING pkg-config-0.26/COPYING + # dev-libs/libnl-3.2.14: libnl-doc-3.2.14/COPYING libnl-3.2.14/COPYING + # dev-libs/libpcre-8.30-r2: pcre-8.30/LICENCE pcre-8.30/COPYING + # dev-libs/libusb-0.1.12-r6: libusb-0.1.12/COPYING libusb-0.1.12/LICENSE + # dev-libs/pyzy-0.1.0-r1: db/COPYING pyzy-0.1.0/COPYING + # net-misc/strongswan-5.0.2-r4: strongswan-5.0.2/COPYING + # strongswan-5.0.2/LICENSE + # sys-process/procps-3.2.8_p11: debian/copyright procps-3.2.8/COPYING + logging.info('License(s) for %s: %s', self.fullnamerev, + ' '.join(license_files)) + for license_file in sorted(license_files): + # Joy and pink ponies. Some license_files are encoded as latin1 while + # others are utf-8 and of course you can't know but only guess. + license_path = os.path.join(workdir, license_file) + license_txt = ReadUnknownEncodedFile(license_path, "Adding License") + + self.license_text_scanned += [ + "Scanned Source License %s:\n\n%s" % (license_file, license_txt)] + + # We used to clean up here, but there have been many instances where + # looking at unpacked source to see where the licenses were, was useful + # so let's disable this for now + # self._RunEbuildPhases(['clean']) + + def GetPackageInfo(self, fullnamewithrev): + """Populate PackageInfo with package license, and homepage. + + self.ebuild_license_names will not be filled if the package is skipped + or if there was an issue getting data from the ebuild. + self.license_names will only get the licenses that we can paste + as shared licenses. + scan_source_for_licenses will be set if we should unpack the source to look + for licenses + if need_copyright_attribution is also set, not finding a license in the + source is fatal (PackageLicenseError will get raised). + + Args: + fullnamewithrev: e.g. dev-libs/libatomic_ops-7.2d + + Raises: + AssertionError: on runtime errors + """ + if not fullnamewithrev: + if not self.build_source_tree: + raise AssertionError("Cannot continue without full name or source tree") + fullnamewithrev = "%s/%s" % (self._BuildInfo("CATEGORY"), + self._BuildInfo("PF")) + logging.debug("Computed package name %s from %s", fullnamewithrev, + self.build_source_tree) + + try: + cpv = portage_utilities.SplitCPV(fullnamewithrev) + # A bad package can either raise a TypeError exception or return None, + # so we catch both cases. + if not cpv: + raise TypeError + except TypeError: + raise AssertionError("portage couldn't find %s, missing version number?" % + fullnamewithrev) + + self.category, self.name, self.version, self.revision = ( + cpv.category, cpv.package, cpv.version_no_rev, cpv.rev) + + if self.revision is not None: + self.revision = str(self.revision).lstrip('r') + if self.revision == '0': + self.revision = None + + if self.category in SKIPPED_CATEGORIES: + logging.info("%s in SKIPPED_CATEGORIES, skip package", self.fullname) + self.skip = True + return + + if self.fullname in SKIPPED_PACKAGES: + logging.info("%s in SKIPPED_PACKAGES, skip package", self.fullname) + self.skip = True + return + + def _FindEbuildPath(self): + """Populate package info from an ebuild retrieved via equery.""" + # By default, equery returns the latest version of the package. A + # build may have used an older version than what is currently + # available in the source tree (a build dependency can be pinned + # to an older version of a package for compatibility + # reasons). Therefore we need to tell equery that we want the + # exact version number used in the image build as opposed to the + # latest available in the source tree. + args = ['equery-%s' % self.board, '-q', '-C', 'which', self.fullnamerev] + try: + path = cros_build_lib.RunCommand(args, print_cmd=True, + redirect_stdout=True).output.strip() + if not path: + raise AssertionError + except: + raise AssertionError('GetEbuildPath for %s failed.\n' + 'Is your tree clean? Delete %s and rebuild' % + (self.name, + cros_build_lib.GetSysroot(board=self.board))) + logging.debug("%s -> %s", " ".join(args), path) + + if not os.access(path, os.F_OK): + raise AssertionError("Can't access %s", path) + + self.ebuild_path = path + + def _ReadEbuildMetadata(self): + """Read package metadata retrieved via portageq.""" + args = ['portageq-%s' % self.board, 'metadata', + cros_build_lib.GetSysroot(board=self.board), 'ebuild', + self.fullnamerev, 'HOMEPAGE', 'LICENSE'] + tmp = cros_build_lib.RunCommand(args, print_cmd=debug, + redirect_stdout=True) + lines = tmp.output.splitlines() + # Runs: + # portageq metadata /build/x86-alex ebuild net-misc/wget-1.12-r2 \ + # HOMEPAGE LICENSE + # Returns: + # http://www.gnu.org/software/wget/ + # GPL-3 + self.homepages, self.ebuild_license_names = ( + lines[0].split(), lines[1].split()) + + def _TestEbuildContents(self): + """Discover if the ebuild installed any files. + + Returns: + bool which tells if any files were installed. + """ + # Search for anything the ebuild might install, other than a directory. + args = ['equery-%s' % self.board, '-q', '-C', 'files', self.fullnamerev, + '-f', 'obj'] + tmp = cros_build_lib.RunCommand(args, print_cmd=debug, redirect_stdout=True) + lines = tmp.output.splitlines() + + # lines is an array of the file names installed by the ebuild. + return bool(lines) + + def GetLicenses(self): + """Get licenses from the ebuild field and the unpacked source code. + + Some packages have static license mappings applied to them that get + retrieved from the ebuild. + + For others, we figure out whether the package source should be scanned to + add licenses found there. + + Raises: + AssertionError: on runtime errors + PackageLicenseError: couldn't find license in ebuild and source. + """ + if self.build_source_tree: + # If the total size installed is zero, we installed no content to license. + if self._BuildInfo("SIZE").strip() == '0': + self.skip = True + return + self.homepages = self._BuildInfo("HOMEPAGE").split() + self.ebuild_license_names = self._BuildInfo("LICENSE").split() + else: + self._FindEbuildPath() + self._ReadEbuildMetadata() + self.skip = self.skip or not self._TestEbuildContents() + + # If this ebuild only uses skipped licenses, skip it. + if (self.ebuild_license_names and + all(l in SKIPPED_LICENSES for l in self.ebuild_license_names)): + self.skip = True + + if self.skip: + return + + if self.fullname in PACKAGE_HOMEPAGES: + self.homepages = PACKAGE_HOMEPAGES[self.fullname] + + # Packages with missing licenses or licenses that need mapping (like + # BSD/MIT) are hardcoded here: + if self.fullname in PACKAGE_LICENSES: + self.ebuild_license_names = PACKAGE_LICENSES[self.fullname] + logging.info("Static license mapping for %s: %s", self.fullnamerev, + ",".join(self.ebuild_license_names)) + else: + logging.info("Read licenses for %s: %s", self.fullnamerev, + ",".join(self.ebuild_license_names)) + + # Lots of packages in chromeos-base have their license set to BSD instead + # of BSD-Google: + new_license_names = [] + for license_name in self.ebuild_license_names: + # TODO: temp workaround for http;//crbug.com/348750 , remove when the bug + # is fixed. + if (license_name == "BSD" and + self.fullnamerev.startswith("chromeos-base/")): + license_name = "BSD-Google" + logging.error( + "Fixed BSD->BSD-Google for %s because it's in chromeos-base. " + "Please fix the LICENSE field in the ebuild", self.fullnamerev) + # TODO: temp workaround for http;//crbug.com/348749 , remove when the bug + # is fixed. + if license_name == "Proprietary": + license_name = "Google-TOS" + logging.error( + "Fixed Proprietary -> Google-TOS for %s. " + "Please fix the LICENSE field in the ebuild", self.fullnamerev) + new_license_names.append(license_name) + self.ebuild_license_names = new_license_names + + # The ebuild license field can look like: + # LICENSE="GPL-3 LGPL-3 Apache-2.0" (this means AND, as in all 3) + # for third_party/portage-stable/app-admin/rsyslog/rsyslog-5.8.11.ebuild + # LICENSE="|| ( LGPL-2.1 MPL-1.1 )" + # for third_party/portage-stable/x11-libs/cairo/cairo-1.8.8.ebuild + + # The parser isn't very smart and only has basic support for the + # || ( X Y ) OR logic to do the following: + # In order to save time needlessly unpacking packages and looking or a + # cleartext license (which is really a crapshoot), if we have a license + # like BSD that requires looking for copyright attribution, but we can + # chose another license like GPL, we do that. + + if not self.skip and not self.ebuild_license_names: + logging.error("%s: no license found in ebuild. FIXME!", self.fullnamerev) + # In a bind, you could comment this out. I'm making the output fail to + # get your attention since this error really should be fixed, but if you + # comment out the next line, the script will try to find a license inside + # the source. + raise PackageLicenseError() + + # This is not invalid, but the parser can't deal with it, so if it ever + # happens, error out to tell the programmer to do something. + # dev-python/pycairo-1.10.0-r4: LGPL-3 || ( LGPL-2.1 MPL-1.1 ) + if "||" in self.ebuild_license_names[1:]: + logging.error("%s: Can't parse || in the middle of a license: %s", + self.fullnamerev, ' '.join(self.ebuild_license_names)) + raise PackageLicenseError() + + or_licenses_and_one_is_no_attribution = False + # We do a quick early pass first so that the longer pass below can + # run accordingly. + for license_name in [x for x in self.ebuild_license_names + if x not in LICENCES_IGNORE]: + # Here we have an OR case, and one license that we can use stock, so + # we remember that in order to be able to skip license attributions if + # any were in the OR. + if (self.ebuild_license_names[0] == "||" and + license_name not in COPYRIGHT_ATTRIBUTION_LICENSES): + or_licenses_and_one_is_no_attribution = True + + for license_name in [x for x in self.ebuild_license_names + if x not in LICENCES_IGNORE]: + # Licenses like BSD or MIT can't be used as is because they do not contain + # copyright self. They have to be replaced by copyright file given in the + # source code, or manually mapped by us in PACKAGE_LICENSES + if license_name in COPYRIGHT_ATTRIBUTION_LICENSES: + # To limit needless efforts, if a package is BSD or GPL, we ignore BSD + # and use GPL to avoid scanning the package, but we can only do this if + # or_licenses_and_one_is_no_attribution has been set above. + # This ensures that if we have License: || (BSD3 BSD4), we will + # look in the source. + if or_licenses_and_one_is_no_attribution: + logging.info("%s: ignore license %s because ebuild LICENSES had %s", + self.fullnamerev, license_name, + ' '.join(self.ebuild_license_names)) + else: + logging.info("%s: can't use %s, will scan source code for copyright", + self.fullnamerev, license_name) + self.need_copyright_attribution = True + self.scan_source_for_licenses = True + else: + self.license_names.add(license_name) + # We can't display just 2+ because it only contains text that says to + # read v2 or v3. + if license_name == 'GPL-2+': + self.license_names.add('GPL-2') + if license_name == 'LGPL-2+': + self.license_names.add('LGPL-2') + + if license_name in LOOK_IN_SOURCE_LICENSES: + logging.info("%s: Got %s, will try to find better license in source...", + self.fullnamerev, license_name) + self.scan_source_for_licenses = True + + if self.license_names: + logging.info('%s: using stock|cust license(s) %s', + self.fullnamerev, ','.join(self.license_names)) + + # If the license(s) could not be found, or one requires copyright + # attribution, dig in the source code for license files: + # For instance: + # Read licenses from ebuild for net-dialup/ppp-2.4.5-r3: BSD,GPL-2 + # We need get the substitution file for BSD and add it to GPL. + if self.scan_source_for_licenses: + self._ExtractLicenses() + + # This shouldn't run, but leaving as sanity check. + if not self.license_names and not self.license_text_scanned: + raise AssertionError("Didn't find usable licenses for %s" % + self.fullnamerev) + + +class Licensing(object): + """Do the actual work of extracting licensing info and outputting html.""" + + def __init__(self, board, package_fullnames, gen_licenses): + # eg x86-alex + self.board = board + # List of stock and custom licenses referenced in ebuilds. Used to + # print a report. Dict value says which packages use that license. + self.licenses = {} + + # Licenses are supposed to be generated at package build time and be + # ready for us, but in case they're not, they can be generated. + self.gen_licenses = gen_licenses + + # This keeps track of whether we have an incomplete license file due to + # package errors during parsing. + # Any non empty list at the end shows the list of packages that caused + # errors. + self.incomplete_packages = [] + + self.package_text = {} + self.entry_template = None + + # We need to have a dict for the list of packages objects, index by package + # fullnamerev, so that when we scan our licenses at the end, and find out + # some shared licenses are only used by one package, we can access that + # package object by name, and add the license directly in that object. + self.packages = {} + self._package_fullnames = package_fullnames + + @property + def sorted_licenses(self): + return sorted(self.licenses.keys(), key=str.lower) + + def _SaveLicenseDump(self, pkg): + if pkg.build_source_tree: + save_file = "%s/build-info/license.yaml" % pkg.build_source_tree + else: + save_file = pkg.license_dump_path + logging.debug("Saving license to %s", save_file) + save_dir = os.path.dirname(save_file) + if not os.path.isdir(save_dir): + os.makedirs(save_dir, 0755) + with open(save_file, "w") as f: + yaml_dump = [] + for key, value in pkg.__dict__.items(): + yaml_dump.append([key, value]) + f.write(yaml.dump(yaml_dump)) + + def _LoadLicenseDump(self, pkg): + save_file = pkg.license_dump_path + logging.debug("Getting license from %s for %s", save_file, pkg.name) + with open(save_file, "r") as f: + # yaml.safe_load barfs on unicode it output, but we don't really need it. + yaml_dump = yaml.load(f) + for key, value in yaml_dump: + pkg.__dict__[key] = value + + def LicensedPackages(self, license_name): + """Return list of packages using a given license.""" + return self.licenses[license_name] + + def LoadPackageInfo(self, board): + """Populate basic package info for all packages from their ebuild.""" + for package_name in self._package_fullnames: + pkg = PackageInfo() + pkg.board = board + pkg.GetPackageInfo(package_name) + self.packages[package_name] = pkg + + def HookPackageProcess(self, pkg_build_path): + """Different entry point to populate a packageinfo. + + This is called instead of LoadPackageInfo when called by a package build. + + Args: + pkg_build_path: unpacked being built by emerge. + """ + pkg = PackageInfo() + pkg.build_source_tree = pkg_build_path + pkg.GetPackageInfo(None) + if not pkg.skip: + pkg.GetLicenses() + self._SaveLicenseDump(pkg) + + def ProcessPackageLicenses(self): + """Iterate through all packages provided and gather their licenses. + + GetLicenses will scrape licenses from the code and/or gather stock license + names. We gather the list of stock and custom ones for later processing. + + Do not call this after adding virtual packages with AddExtraPkg. + """ + for package_name in self.packages: + pkg = self.packages[package_name] + if pkg.skip: + if self.gen_licenses: + logging.info("Package %s is in skip list", package_name) + else: + # If we do a licensing run expecting to get licensing objects from + # an image build, virtual packages will be missing such objects + # because virtual packages do not get the install hook run at build + # time. Because this script may not have permissions to write in the + # /var/db/ directory, we don't want it to generate useless license + # bits for virtual packages. As a result, ignore virtual packages + # here. + if pkg.category == "virtual": + logging.debug("Ignoring %s virtual package", package_name) + continue + + # Other skipped packages get dumped with incomplete info and the skip flag + if not os.path.exists(pkg.license_dump_path) and not self.gen_licenses: + logging.warning(">>> License for %s is missing, creating now <<<", + package_name) + if not os.path.exists(pkg.license_dump_path) or self.gen_licenses: + if not pkg.skip: + try: + pkg.GetLicenses() + except PackageLicenseError: + pkg.licensing_failed = True + # We dump packages where licensing failed too. + self._SaveLicenseDump(pkg) + + # To debug the code, we force the data to be re-read from the dumps + # instead of reusing what we may have in memory. + for package_name in self.packages: + pkg = self.packages[package_name] + if pkg.category == "virtual": + continue + + self._LoadLicenseDump(pkg) + logging.debug("loaded dump for %s", pkg.fullnamerev) + if pkg.skip: + logging.info("Package %s is in skip list", pkg.fullnamerev) + if pkg.licensing_failed: + logging.info("Package %s failed licensing", pkg.fullnamerev) + self.incomplete_packages += [pkg.fullnamerev] + + def AddExtraPkg(self, pkg_data): + """Allow adding pre-created virtual packages. + + GetLicenses will not work on them, so add them after having run + ProcessPackages. + + Args: + pkg_data: array of package data as defined below + """ + pkg = PackageInfo() + pkg.board = self.board + pkg.category = pkg_data[0] + pkg.name = pkg_data[1] + pkg.version = pkg_data[2] + pkg.homepages = pkg_data[3] # this is a list + pkg.license_names = pkg_data[4] # this is also a list + pkg.ebuild_license_names = pkg_data[4] + self.packages[pkg.fullnamerev] = pkg + + # Called directly by src/repohooks/pre-upload.py + @staticmethod + def FindLicenseType(license_name): + """Says if a license is stock Gentoo, custom, or doesn't exist.""" + + for directory in STOCK_LICENSE_DIRS: + path = '%s/%s' % (directory, license_name) + if os.path.exists(path): + return "Gentoo Package Stock" + + for directory in CUSTOM_LICENSE_DIRS: + path = '%s/%s' % (directory, license_name) + if os.path.exists(path): + return "Custom" + + if license_name in SKIPPED_LICENSES: + return "Custom" + + raise AssertionError(""" +license %s could not be found in %s +If the license in the ebuild is correct, +a) a stock license should be added to portage-stable/licenses : +running `cros_portage_upgrade` inside of the chroot should clone this repo +to /tmp/portage/: +https://chromium.googlesource.com/chromiumos/overlays/portage/+/gentoo +find the new licenses under licenses, and add them to portage-stable/licenses + +b) if it's a non gentoo package with a custom license, you can copy that license +to third_party/chromiumos-overlay/licenses/ + +Try re-running the script with -p cat/package-ver --generate +after fixing the license.""" % + (license_name, + '\n'.join(STOCK_LICENSE_DIRS + CUSTOM_LICENSE_DIRS)) + ) + + @staticmethod + def ReadSharedLicense(license_name): + """Read and return stock or cust license file specified in an ebuild.""" + + license_path = None + for directory in STOCK_LICENSE_DIRS + CUSTOM_LICENSE_DIRS: + path = os.path.join(directory, license_name) + if os.path.exists(path): + license_path = path + break + + if license_path: + return ReadUnknownEncodedFile(license_path, "read license") + else: + raise AssertionError("license %s could not be found in %s" + % (license_name, + '\n'.join(STOCK_LICENSE_DIRS + + CUSTOM_LICENSE_DIRS)) + ) + + @staticmethod + def EvaluateTemplate(template, env): + """Expand a template with vars like {{foo}} using a dict of expansions.""" + # TODO switch to stock python templates. + for key, val in env.iteritems(): + template = template.replace('{{%s}}' % key, val) + return template + + def _GeneratePackageLicenseText(self, pkg): + """Concatenate all licenses related to a pkg. + + This means a combination of ebuild shared licenses and licenses read from + the pkg source tree, if any. + + Args: + pkg: PackageInfo object + + Raises: + AssertionError: on runtime errors + """ + license_text = [] + for license_text_scanned in pkg.license_text_scanned: + license_text.append(license_text_scanned) + license_text.append('%s\n' % ('-=' * 40)) + + license_pointers = [] + # sln: shared license name. + for sln in pkg.license_names: + # Says whether it's a stock gentoo or custom license. + license_type = self.FindLicenseType(sln) + license_pointers.append( + "<li><a href='#%s'>%s License %s</a></li>" % ( + sln, license_type, sln)) + + # This should get caught earlier, but one extra check. + if not license_text + license_pointers: + raise AssertionError('Ended up with no license_text for %s', pkg.name) + + env = { + 'name': "%s-%s" % (pkg.name, pkg.version), + 'url': cgi.escape(pkg.homepages[0]) if pkg.homepages else '', + 'licenses_txt': cgi.escape('\n'.join(license_text)) or '', + 'licenses_ptr': '\n'.join(license_pointers) or '', + } + self.package_text[pkg] = self.EvaluateTemplate(self.entry_template, env) + + def GenerateHTMLLicenseOutput(self, output_file, + output_template=TMPL, + entry_template=ENTRY_TMPL, + license_template=SHARED_LICENSE_TMPL): + """Generate the combined html license file used in ChromeOS. + + Args: + output_file: resulting HTML license output. + output_template: template for the entire HTML file. + entry_template: template for per package entries. + license_template: template for shared license entries. + """ + self.entry_template = ReadUnknownEncodedFile(entry_template) + sorted_license_txt = [] + + # Keep track of which licenses are used by which packages. + for pkg in self.packages.values(): + if pkg.skip or pkg.licensing_failed: + continue + for sln in pkg.license_names: + self.licenses.setdefault(sln, []).append(pkg.fullnamerev) + + # Find licenses only used once, and roll them in the package that uses them. + # We use keys() because licenses is modified in the loop, so we can't use + # an iterator. + for sln in self.licenses.keys(): + if len(self.licenses[sln]) == 1: + pkg_fullnamerev = self.licenses[sln][0] + logging.info("Collapsing shared license %s into single use license " + "(only used by %s)", sln, pkg_fullnamerev) + license_type = self.FindLicenseType(sln) + license_txt = self.ReadSharedLicense(sln) + single_license = "%s License %s:\n\n%s" % (license_type, sln, + license_txt) + pkg = self.packages[pkg_fullnamerev] + pkg.license_text_scanned.append(single_license) + pkg.license_names.remove(sln) + del self.licenses[sln] + + for pkg in sorted(self.packages.values(), + key=lambda x: (x.name.lower(), x.version, x.revision)): + if pkg.skip: + logging.debug("Skipping package %s", pkg.fullnamerev) + continue + if pkg.licensing_failed: + logging.debug("Package %s failed licensing, skipping", pkg.fullnamerev) + continue + self._GeneratePackageLicenseText(pkg) + sorted_license_txt += [self.package_text[pkg]] + + # Now generate the bottom of the page that will contain all the shared + # licenses and a list of who is pointing to them. + license_template = ReadUnknownEncodedFile(license_template) + + licenses_txt = [] + for license_name in self.sorted_licenses: + env = { + 'license_name': license_name, + 'license': cgi.escape(self.ReadSharedLicense(license_name)), + 'license_type': self.FindLicenseType(license_name), + 'license_packages': ' '.join(self.LicensedPackages(license_name)), + } + licenses_txt += [self.EvaluateTemplate(license_template, env)] + + file_template = ReadUnknownEncodedFile(output_template) + env = { + 'entries': '\n'.join(sorted_license_txt), + 'licenses': '\n'.join(licenses_txt), + } + osutils.WriteFile(output_file, + self.EvaluateTemplate(file_template, env).encode('UTF-8')) + + +def ListInstalledPackages(board, all_packages=False): + """Return a list of all packages installed for a particular board.""" + + # If all_packages is set to True, all packages visible in the build + # chroot are used to generate the licensing file. This is not what you want + # for a release license file, but it's a way to run licensing checks against + # all packages. + # If it's set to False, it will only generate a licensing file that contains + # packages used for a release build (as determined by the dependencies for + # virtual/target-os). + + if all_packages: + # The following returns all packages that were part of the build tree + # (many get built or used during the build, but do not get shipped). + # Note that it also contains packages that are in the build as + # defined by build_packages but not part of the image we ship. + args = ["equery-%s" % board, "list", "*"] + packages = cros_build_lib.RunCommand(args, print_cmd=debug, + redirect_stdout=True + ).output.splitlines() + else: + # The following returns all packages that were part of the build tree + # (many get built or used during the build, but do not get shipped). + # Note that it also contains packages that are in the build as + # defined by build_packages but not part of the image we ship. + args = ["emerge-%s" % board, "--with-bdeps=y", "--usepkgonly", + "--emptytree", "--pretend", "--color=n", "virtual/target-os"] + emerge = cros_build_lib.RunCommand(args, print_cmd=debug, + redirect_stdout=True).output.splitlines() + # Another option which we've decided not to use, is bdeps=n. This outputs + # just the packages we ship, but does not packages that were used to build + # them, including a package like flex which generates a .a that is included + # and shipped in ChromeOS. + # We've decided to credit build packages, even if we're not legally required + # to (it's always nice to do), and that way we get corner case packages like + # flex. This is why we use bdep=y and not bdep=n. + + packages = [] + # [binary R ] x11-libs/libva-1.1.1 to /build/x86-alex/ + pkg_rgx = re.compile(r'\[[^]]+R[^]]+\] (.+) to /build/.*') + # If we match something else without the 'R' like + # [binary U ] chromeos-base/pepper-flash-13.0.0.133-r1 [12.0.0.77-r1] + # this is bad and we should die on this. + pkg_rgx2 = re.compile(r'(\[[^]]+\] .+) to /build/.*') + for line in emerge: + match = pkg_rgx.search(line) + match2 = pkg_rgx2.search(line) + if match: + packages.append(match.group(1)) + elif match2: + raise AssertionError("Package incorrectly installed, try eclean-%s" % + board, "\n%s" % match2.group(1)) + + return packages + + +def _HandleIllegalXMLChars(text): + """Handles illegal XML Characters. + + XML 1.0 acceptable character range: + Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | \ + [#x10000-#x10FFFF] + + This function finds all illegal characters in the text and filters + out all whitelisted characters (e.g. ^L). + + Args: + text: text to examine. + + Returns: + Filtered |text| and a list of non-whitelisted illegal characters found. + """ + whitelist_re = re.compile(u'[\x0c]') + text = whitelist_re.sub('', text) + # illegal_chars_re includes all illegal characters (whitelisted or + # not), so we can expand the whitelist without modifying this line. + illegal_chars_re = re.compile( + u'[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]') + return (text, illegal_chars_re.findall(text)) + + +def ReadUnknownEncodedFile(file_path, logging_text=None): + """Read a file of unknown encoding (UTF-8 or latin) by trying in sequence. + + Args: + file_path: what to read. + logging_text: what to display for logging depending on file read. + + Returns: + File content, possibly converted from latin1 to UTF-8. + + Raises: + Assertion error: if non-whitelisted illegal XML characters + are found in the file. + ValueError: returned if we get invalid XML. + """ + try: + with codecs.open(file_path, encoding="utf-8") as c: + file_txt = c.read() + if logging_text: + logging.info("%s %s (UTF-8)", logging_text, file_path) + except UnicodeDecodeError: + with codecs.open(file_path, encoding="latin1") as c: + file_txt = c.read() + if logging_text: + logging.info("%s %s (latin1)", logging_text, file_path) + + file_txt, char_list = _HandleIllegalXMLChars(file_txt) + + if char_list: + raise ValueError('Illegal XML characters %s found in %s.' % + (char_list, file_path)) + + return file_txt |