summaryrefslogtreecommitdiff
path: root/licensing
diff options
context:
space:
mode:
authorDon Garrett <dgarrett@google.com>2014-08-12 14:56:01 -0700
committerchrome-internal-fetch <chrome-internal-fetch@google.com>2014-08-16 05:22:15 +0000
commitc2fd63c7a3c315fe4a4eae17222085ac5f84ca04 (patch)
tree0809168b035828234ccdfde212ce14fc985457c4 /licensing
parent07ac932c5dbaaa0fc1de3db186669a5bf805ccd6 (diff)
downloadchromite-c2fd63c7a3c315fe4a4eae17222085ac5f84ca04.tar.gz
licensing: Split licenses.py -> licenses_lib.py and licenses.py.
Turn license into a stand alone script that uses a helper library to do it's work. Split up it's 'main' method into distinct stages. Also, fix nit's in hook script from a previous CL. BUG=None TEST=cros lint + manual testing. CQ-DEPEND=CL:212124 Change-Id: Ic81d50260a6b7603904aabc38b3877ad59abb2b7 Reviewed-on: https://chromium-review.googlesource.com/212137 Tested-by: Don Garrett <dgarrett@chromium.org> Reviewed-by: Don Garrett <dgarrett@chromium.org> Commit-Queue: Don Garrett <dgarrett@chromium.org>
Diffstat (limited to 'licensing')
-rw-r--r--licensing/ebuild_license_hook.py15
-rw-r--r--licensing/licenses.py1333
-rw-r--r--licensing/licenses_lib.py1241
3 files changed, 1296 insertions, 1293 deletions
diff --git a/licensing/ebuild_license_hook.py b/licensing/ebuild_license_hook.py
index 41e33d602..07fb7c12b 100644
--- a/licensing/ebuild_license_hook.py
+++ b/licensing/ebuild_license_hook.py
@@ -1,9 +1,8 @@
#!/usr/bin/python
-#
-# Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
+# Copyright 2014 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
-#
+
"""Emerge hook to pre-parse and verify license information.
Called from src/scripts/hooks/install/gen-package-licenses.sh as part of a
@@ -12,16 +11,16 @@ package emerge.
from chromite.lib import commandline
-from chromite.licensing import licenses
+from chromite.licensing import licenses_lib
def main(args):
parser = commandline.ArgumentParser(usage=__doc__)
- parser.add_argument("--builddir", type="path", dest="builddir",
- help="Take $PORTAGE_BUILDDIR as argument.")
+ parser.add_argument('--builddir', type='path', dest='builddir',
+ help='Take $PORTAGE_BUILDDIR as argument.')
opts = parser.parse_args(args)
+ opts.Freeze()
-
- licensing = licenses.Licensing(None, None, True)
+ licensing = licenses_lib.Licensing(None, None, True)
licensing.HookPackageProcess(opts.builddir)
diff --git a/licensing/licenses.py b/licensing/licenses.py
index a9e548cec..cf279780a 100644
--- a/licensing/licenses.py
+++ b/licensing/licenses.py
@@ -120,1282 +120,55 @@ the file in /opt/google/chrome/resources/about_os_credits.html or as defined
in http://crbug.com/271832 .
"""
-import cgi
-import codecs
+
import logging
import os
-import re
-import tempfile
-from chromite.cbuildbot import constants
-from chromite.cbuildbot import portage_utilities
from chromite.lib import commandline
from chromite.lib import cros_build_lib
-from chromite.lib import osutils
-
-# We are imported by src/repohooks/pre-upload.py in a non chroot environment
-# where yaml may not be there, so we don't error on that since it's not needed
-# in that case.
-try:
- import yaml
-except ImportError:
- yaml = None
-
-debug = False
-
-# See http://crbug.com/207004 for discussion.
-PER_PKG_LICENSE_DIR = '/var/db/pkg'
-
-STOCK_LICENSE_DIRS = [
- os.path.join(constants.SOURCE_ROOT,
- 'src/third_party/portage-stable/licenses'),
-]
-
-# There are licenses for custom software we got and isn't part of
-# upstream gentoo.
-CUSTOM_LICENSE_DIRS = [
- os.path.join(constants.SOURCE_ROOT,
- 'src/third_party/chromiumos-overlay/licenses'),
-]
-
-COPYRIGHT_ATTRIBUTION_DIR = (
- os.path.join(
- constants.SOURCE_ROOT,
- 'src/third_party/chromiumos-overlay/licenses/copyright-attribution'))
-
-# Virtual packages don't need to have a license and often don't, so we skip them
-# chromeos-base contains google platform packages that are covered by the
-# general license at top of tree, so we skip those too.
-SKIPPED_CATEGORIES = [
- 'virtual',
-]
-
-SKIPPED_PACKAGES = [
- # Fix these packages by adding a real license in the code.
- # You should not skip packages just because the license scraping doesn't
- # work. Stick those special cases into PACKAGE_LICENSES.
- # Packages should only be here because they are sub/split packages already
- # covered by the license of the main package.
-
- # These are Chrome-OS-specific packages, copyright BSD-Google
- 'sys-kernel/chromeos-kernel', # already manually credit Linux
-]
-
-SKIPPED_LICENSES = [
- # Some of our packages contain binary blobs for which we have special
- # negotiated licenses, and no need to display anything publicly. Strongly
- # consider using Google-TOS instead, if possible.
- 'Proprietary-Binary',
-
- # If you have an early repo for which license terms have yet to be decided
- # use this. It will cause licensing for the package to be mostly ignored.
- # Official should error for any package with this license.
- 'TAINTED', # TODO(dgarrett): Error on official builds with this license.
-]
-
-LICENSE_NAMES_REGEX = [
- r'^copyright$',
- r'^copyright[.]txt$',
- r'^copyright[.]regex$', # llvm
- r'^copying.*$',
- r'^licen[cs]e.*$',
- r'^licensing.*$', # libatomic_ops
- r'^ipa_font_license_agreement_v1[.]0[.]txt$', # ja-ipafonts
- r'^PKG-INFO$', # copyright assignment for
- # some python packages
- # (netifaces, unittest2)
-]
-
-# These are _temporary_ license mappings for packages that do not have a valid
-# shared/custom license, or LICENSE file we can use.
-# Once this script runs earlier (during the package build process), it will
-# block new source without a LICENSE file if the ebuild contains a license
-# that requires copyright assignment (BSD and friends).
-# At that point, new packages will get fixed to include LICENSE instead of
-# adding workaround mappings like those below.
-# The way you now fix copyright attribution cases create a custom file with the
-# right license directly in COPYRIGHT_ATTRIBUTION_DIR.
-PACKAGE_LICENSES = {
- # TODO: replace the naive license parsing code in this script with a hook
- # into portage's license parsing. See http://crbug.com/348779
-
- # Chrome (the browser) is complicated, it has a morphing license that is
- # either BSD-Google, or BSD-Google,Google-TOS depending on how it was
- # built. We bypass this problem for now by hardcoding the Google-TOS bit as
- # per ChromeOS with non free bits
- 'chromeos-base/chromeos-chrome': ['BSD-Google', 'Google-TOS'],
-
- # Currently the code cannot parse LGPL-3 || ( LGPL-2.1 MPL-1.1 )
- 'dev-python/pycairo': ['LGPL-3', 'LGPL-2.1'],
-}
-
-# Any license listed list here found in the ebuild will make the code look for
-# license files inside the package source code in order to get copyright
-# attribution from them.
-COPYRIGHT_ATTRIBUTION_LICENSES = [
- 'BSD', # requires distribution of copyright notice
- 'BSD-2', # so does BSD-2 http://opensource.org/licenses/BSD-2-Clause
- 'BSD-3', # and BSD-3? http://opensource.org/licenses/BSD-3-Clause
- 'BSD-4', # and 4?
- 'BSD-with-attribution',
- 'MIT',
- 'MIT-with-advertising',
- 'Old-MIT',
-]
-
-# The following licenses are not invalid or to show as a less helpful stock
-# license, but it's better to look in the source code for a more specific
-# license if there is one, but not an error if no better one is found.
-# Note that you don't want to set just anything here since any license here
-# will be included once in stock form and a second time in custom form if
-# found (there is no good way to know that a license we found on disk is the
-# better version of the stock version, so we show both).
-LOOK_IN_SOURCE_LICENSES = [
- 'as-is', # The stock license is very vague, source always has more details.
- 'PSF-2', # The custom license in python is more complete than the template.
-
- # As far as I know, we have no requirement to do copyright attribution for
- # these licenses, but the license included in the code has slightly better
- # information than the stock Gentoo one (including copyright attribution).
- 'BZIP2', # Single use license, do copyright attribution.
- 'OFL', # Almost single use license, do copyright attribution.
- 'OFL-1.1', # Almost single use license, do copyright attribution.
- 'UoI-NCSA', # Only used by NSCA, might as well show their custom copyright.
-]
-
-# This used to provide overrides. I can't find a valid reason to add any more
-# here, though.
-PACKAGE_HOMEPAGES = {
- # Example:
- # 'x11-proto/glproto': ['http://www.x.org/'],
-}
-
-# These are tokens found in LICENSE= in an ebuild that aren't licenses we
-# can actually read from disk.
-# You should not use this to blacklist real licenses.
-LICENCES_IGNORE = [
- ')', # Ignore OR tokens from LICENSE="|| ( LGPL-2.1 MPL-1.1 )"
- '(',
- '||',
-]
-
-# Find the directory of this script.
-SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
-
-# The template files we depend on for generating HTML.
-TMPL = os.path.join(SCRIPT_DIR, 'about_credits.tmpl')
-ENTRY_TMPL = os.path.join(SCRIPT_DIR, 'about_credits_entry.tmpl')
-SHARED_LICENSE_TMPL = os.path.join(
- SCRIPT_DIR, 'about_credits_shared_license_entry.tmpl')
-
-
-# This is called directly by src/repohooks/pre-upload.py
-def GetLicenseTypesFromEbuild(ebuild_path):
- """Returns a list of license types from the ebuild file.
-
- This function does not always return the correct list, but it is
- faster than using portageq for not having to access chroot. It is
- intended to be used for tasks such as presubmission checks.
-
- Args:
- ebuild_path: ebuild to read.
-
- Returns:
- list of licenses read from ebuild.
-
- Raises:
- ValueError: ebuild errors.
- """
- ebuild_env_tmpl = """
-has() { [[ " ${*:2} " == *" $1 "* ]]; }
-inherit() {
- local overlay_list="%(overlay_list)s"
- local eclass overlay f
- for eclass; do
- has ${eclass} ${_INHERITED_} && continue
- _INHERITED_+=" ${eclass}"
- for overlay in %(overlay_list)s; do
- f="${overlay}/eclass/${eclass}.eclass"
- if [[ -e ${f} ]]; then
- source "${f}"
- break
- fi
- done
- done
-}
-source %(ebuild)s"""
-
- # TODO: the overlay_list hard-coded here should be changed to look
- # at the current overlay, and then the master overlays. E.g. for an
- # ebuild file in overlay-parrot, we will look at parrot overlay
- # first, and then look at portage-stable and chromiumos, which are
- # listed as masters in overlay-parrot/metadata/layout.conf.
- tmpl_env = {
- 'ebuild': ebuild_path,
- 'overlay_list': '%s %s' % (
- os.path.join(constants.SOURCE_ROOT,
- 'src/third_party/chromiumos-overlay'),
- os.path.join(constants.SOURCE_ROOT,
- 'src/third_party/portage-stable'))
- }
-
- with tempfile.NamedTemporaryFile(bufsize=0) as f:
- osutils.WriteFile(f.name, ebuild_env_tmpl % tmpl_env)
- env = osutils.SourceEnvironment(
- f.name, whitelist=['LICENSE'], ifs=' ', multiline=True)
-
- if not env.get('LICENSE'):
- raise ValueError('No LICENSE found in the ebuild.')
- if re.search(r'[,;]', env['LICENSE']):
- raise ValueError(
- 'LICENSE field in the ebuild should be whitespace-limited.')
-
- return env['LICENSE'].split()
-
-
-class PackageLicenseError(Exception):
- """Thrown if something fails while getting license information for a package.
-
- This will cause the processing to error in the end.
- """
-
-
-class PackageInfo(object):
- """Package info containers, mostly for storing licenses."""
-
- def __init__(self):
-
- self.board = None
- self.revision = None
-
- # Array of scanned license texts.
- self.license_text_scanned = []
-
- self.category = None
- self.name = None
- self.version = None
-
- # Looks something like this
- # /mnt/host/source/src/
- # third_party/portage-stable/net-misc/rsync/rsync-3.0.8.ebuild
- self.ebuild_path = None
-
- # Array of license names retrieved from ebuild or override in this code.
- self.ebuild_license_names = []
- self.homepages = []
- # This contains licenses names we can read from Gentoo or custom licenses.
- # These are supposed to be shared licenses (i.e. licenses referenced by
- # more then one package), but after all processing, we may find out that
- # some are only used once and they get taken out of the shared pool and
- # pasted directly in the sole package that was using them (see
- # GenerateHTMLLicenseOutput).
- self.license_names = set()
-
- # We set this if the ebuild has a BSD/MIT like license that requires
- # scanning for a LICENSE file in the source code, or a static mapping
- # in PACKAGE_LICENSES. Not finding one once this is set, is fatal.
- self.need_copyright_attribution = False
- # This flag just says we'd like to include licenses from the source, but
- # not finding any is not fatal.
- self.scan_source_for_licenses = False
-
- # After reading basic package information, we can mark the package as
- # one to skip in licensing.
- self.skip = False
-
- # If we failed to get licensing for this package, mark it as such so that
- # it can be flagged when the full license file is being generated.
- self.licensing_failed = False
-
- # If we are called from a hook, we grab package info from the soure tree.
- # This is also used as a flag to know whether we should do package work
- # based on an installed package, or one that is being built and we got
- # called from the hook.
- self.build_source_tree = None
-
- @property
- def fullnamerev(self):
- s = '%s-%s' % (self.fullname, self.version)
- if self.revision:
- s += '-r%s' % self.revision
- return s
-
- @property
- def fullname(self):
- return '%s/%s' % (self.category, self.name)
-
- @property
- def license_dump_path(self):
- """e.g. /build/x86-alex//var/db/pkg/sys-apps/dtc-1.4.0/license.yaml."""
- return "%s/%s/%s/license.yaml" % (cros_build_lib.GetSysroot(self.board),
- PER_PKG_LICENSE_DIR, self.fullnamerev)
-
- def _BuildInfo(self, filename):
- filename = '%s/build-info/%s' % (self.build_source_tree, filename)
- # Buildinfo properties we read are in US-ASCII, not Unicode.
- try:
- bi = open(filename).read().rstrip()
- # Some properties like HOMEPAGE may be absent.
- except IOError:
- bi = ""
- return bi
-
- def _RunEbuildPhases(self, phases):
- """Run a list of ebuild phases on an ebuild.
-
- Args:
- phases: list of phases like ['clean', 'fetch'] or ['unpack'].
-
- Returns:
- ebuild command output
- """
-
- return cros_build_lib.RunCommand(
- ['ebuild-%s' % self.board, self.ebuild_path] + phases, print_cmd=debug,
- redirect_stdout=True)
-
- def _GetOverrideLicense(self):
- """Look in COPYRIGHT_ATTRIBUTION_DIR for license with copyright attribution.
-
- For dev-util/bsdiff-4.3-r5, the code will look for
- dev-util/bsdiff-4.3-r5
- dev-util/bsdiff-4.3
- dev-util/bsdiff
-
- It is ok to have more than one bsdiff license file, and an empty file acts
- as a rubout (i.e. an empty dev-util/bsdiff-4.4 will shadow dev-util/bsdiff
- and tell the licensing code to look in the package source for a license
- instead of using dev-util/bsdiff as an override).
-
- Returns:
- False (no license found) or a multiline license string.
- """
- license_read = None
- # dev-util/bsdiff-4.3-r5 -> bsdiff-4.3-r5
- filename = os.path.basename(self.fullnamerev)
- license_path = os.path.join(COPYRIGHT_ATTRIBUTION_DIR,
- os.path.dirname(self.fullnamerev))
- pv = portage_utilities.SplitPV(filename)
- pv_no_rev = '%s-%s' % (pv.package, pv.version_no_rev)
- for filename in (pv.pv, pv_no_rev, pv.package):
- file_path = os.path.join(license_path, filename)
- logging.debug("Looking for override copyright attribution license in %s",
- file_path)
- if os.path.exists(file_path):
- # Turn
- # /../merlin/trunk/src/third_party/chromiumos-overlay/../dev-util/bsdiff
- # into
- # chromiumos-overlay/../dev-util/bsdiff
- short_dir_path = os.path.join(*file_path.rsplit(os.path.sep, 5)[1:])
- license_read = "Copyright Attribution License %s:\n\n" % short_dir_path
- license_read += ReadUnknownEncodedFile(
- file_path, "read copyright attribution license")
- break
-
- return license_read
-
- def _ExtractLicenses(self):
- """Scrounge for text licenses in the source of package we'll unpack.
-
- This is only called if we couldn't get usable licenses from the ebuild,
- or one of them is BSD/MIT like which forces us to look for a file with
- copyright attribution in the source code itself.
-
- First, we have a shortcut where we scan COPYRIGHT_ATTRIBUTION_DIR to see if
- we find a license for this package. If so, we use that.
- Typically it'll be used if the unpacked source does not have the license
- that we're required to display for copyright attribution (in some cases it's
- plain absent, in other cases, it could be in a filename we don't look for).
-
- Otherwise, we scan the unpacked source code for what looks like license
- files as defined in LICENSE_NAMES_REGEX.
-
- Raises:
- AssertionError: on runtime errors
- PackageLicenseError: couldn't find copyright attribution file.
- """
- license_override = self._GetOverrideLicense()
- if license_override:
- self.license_text_scanned = [license_override]
- return
-
- if self.build_source_tree:
- workdir = "%s/work" % self.build_source_tree
- else:
- self._RunEbuildPhases(['clean', 'fetch'])
- output = self._RunEbuildPhases(['unpack']).output.splitlines()
- # Output is spammy, it looks like this:
- # * gc-7.2d.tar.gz RMD160 SHA1 SHA256 size ;-) ... [ ok ]
- # * checking gc-7.2d.tar.gz ;-) ... [ ok ]
- # * Running stacked hooks for pre_pkg_setup
- # * sysroot_build_bin_dir ...
- # [ ok ]
- # * Running stacked hooks for pre_src_unpack
- # * python_multilib_setup ...
- # [ ok ]
- # >>> Unpacking source...
- # >>> Unpacking gc-7.2d.tar.gz to /build/x86-alex/tmp/po/[...]ps-7.2d/work
- # >>> Source unpacked in /build/x86-alex/tmp/portage/[...]ops-7.2d/work
- # So we only keep the last 2 lines, the others we don't care about.
- output = [line for line in output if line[0:3] == ">>>" and
- line != ">>> Unpacking source..."]
- for line in output:
- logging.info(line)
-
- args = ['portageq-%s' % self.board, 'envvar', 'PORTAGE_TMPDIR']
- result = cros_build_lib.RunCommand(args, print_cmd=debug,
- redirect_stdout=True)
- tmpdir = result.output.splitlines()[0]
- # tmpdir gets something like /build/daisy/tmp/
- workdir = os.path.join(tmpdir, 'portage', self.fullnamerev, 'work')
-
- if not os.path.exists(workdir):
- raise AssertionError("Unpack of %s didn't create %s. Version mismatch" %
- (self.fullnamerev, workdir))
-
- # You may wonder how deep should we go?
- # In case of packages with sub-packages, it could be deep.
- # Let's just be safe and get everything we can find.
- # In the case of libatomic_ops, it's actually required to look deep
- # to find the MIT license:
- # dev-libs/libatomic_ops-7.2d/work/gc-7.2/libatomic_ops/doc/LICENSING.txt
- args = ['find', workdir, '-type', 'f']
- result = cros_build_lib.RunCommand(args, print_cmd=debug,
- redirect_stdout=True).output.splitlines()
- # Truncate results to look like this: swig-2.0.4/COPYRIGHT
- files = [x[len(workdir):].lstrip('/') for x in result]
- license_files = []
- for name in files:
- # When we scan a source tree managed by git, this can contain license
- # files that are not part of the source. Exclude those.
- # (e.g. .git/refs/heads/licensing)
- if ".git/" in name:
- continue
- basename = os.path.basename(name)
- # Looking for license.* brings up things like license.gpl, and we
- # never want a GPL license when looking for copyright attribution,
- # so we skip them here. We also skip regexes that can return
- # license.py (seen in some code).
- if re.search(r".*GPL.*", basename) or re.search(r"\.py$", basename):
- continue
- for regex in LICENSE_NAMES_REGEX:
- if re.search(regex, basename, re.IGNORECASE):
- license_files.append(name)
- break
-
- if not license_files:
- if self.need_copyright_attribution:
- logging.error("""
-%s: unable to find usable license.
-Typically this will happen because the ebuild says it's MIT or BSD, but there
-was no license file that this script could find to include along with a
-copyright attribution (required for BSD/MIT).
-
-If this is Google source, please change
-LICENSE="BSD"
-to
-LICENSE="BSD-Google"
-
-If not, go investigate the unpacked source in %s,
-and find which license to assign. Once you found it, you should copy that
-license to a file under %s
-(or you can modify LICENSE_NAMES_REGEX to pickup a license file that isn't
-being scraped currently).""",
- self.fullnamerev, workdir, COPYRIGHT_ATTRIBUTION_DIR)
- raise PackageLicenseError()
- else:
- # We can get called for a license like as-is where it's preferable
- # to find a better one in the source, but not fatal if we didn't.
- logging.info("Was not able to find a better license for %s "
- "in %s to replace the more generic one from ebuild",
- self.fullnamerev, workdir)
-
- # Examples of multiple license matches:
- # dev-lang/swig-2.0.4-r1: swig-2.0.4/COPYRIGHT swig-2.0.4/LICENSE
- # dev-libs/glib-2.32.4-r1: glib-2.32.4/COPYING pkg-config-0.26/COPYING
- # dev-libs/libnl-3.2.14: libnl-doc-3.2.14/COPYING libnl-3.2.14/COPYING
- # dev-libs/libpcre-8.30-r2: pcre-8.30/LICENCE pcre-8.30/COPYING
- # dev-libs/libusb-0.1.12-r6: libusb-0.1.12/COPYING libusb-0.1.12/LICENSE
- # dev-libs/pyzy-0.1.0-r1: db/COPYING pyzy-0.1.0/COPYING
- # net-misc/strongswan-5.0.2-r4: strongswan-5.0.2/COPYING
- # strongswan-5.0.2/LICENSE
- # sys-process/procps-3.2.8_p11: debian/copyright procps-3.2.8/COPYING
- logging.info('License(s) for %s: %s', self.fullnamerev,
- ' '.join(license_files))
- for license_file in sorted(license_files):
- # Joy and pink ponies. Some license_files are encoded as latin1 while
- # others are utf-8 and of course you can't know but only guess.
- license_path = os.path.join(workdir, license_file)
- license_txt = ReadUnknownEncodedFile(license_path, "Adding License")
-
- self.license_text_scanned += [
- "Scanned Source License %s:\n\n%s" % (license_file, license_txt)]
-
- # We used to clean up here, but there have been many instances where
- # looking at unpacked source to see where the licenses were, was useful
- # so let's disable this for now
- # self._RunEbuildPhases(['clean'])
-
- def GetPackageInfo(self, fullnamewithrev):
- """Populate PackageInfo with package license, and homepage.
-
- self.ebuild_license_names will not be filled if the package is skipped
- or if there was an issue getting data from the ebuild.
- self.license_names will only get the licenses that we can paste
- as shared licenses.
- scan_source_for_licenses will be set if we should unpack the source to look
- for licenses
- if need_copyright_attribution is also set, not finding a license in the
- source is fatal (PackageLicenseError will get raised).
-
- Args:
- fullnamewithrev: e.g. dev-libs/libatomic_ops-7.2d
-
- Raises:
- AssertionError: on runtime errors
- """
- if not fullnamewithrev:
- if not self.build_source_tree:
- raise AssertionError("Cannot continue without full name or source tree")
- fullnamewithrev = "%s/%s" % (self._BuildInfo("CATEGORY"),
- self._BuildInfo("PF"))
- logging.debug("Computed package name %s from %s", fullnamewithrev,
- self.build_source_tree)
-
- try:
- cpv = portage_utilities.SplitCPV(fullnamewithrev)
- # A bad package can either raise a TypeError exception or return None,
- # so we catch both cases.
- if not cpv:
- raise TypeError
- except TypeError:
- raise AssertionError("portage couldn't find %s, missing version number?" %
- fullnamewithrev)
-
- self.category, self.name, self.version, self.revision = (
- cpv.category, cpv.package, cpv.version_no_rev, cpv.rev)
-
- if self.revision is not None:
- self.revision = str(self.revision).lstrip('r')
- if self.revision == '0':
- self.revision = None
-
- if self.category in SKIPPED_CATEGORIES:
- logging.info("%s in SKIPPED_CATEGORIES, skip package", self.fullname)
- self.skip = True
- return
-
- if self.fullname in SKIPPED_PACKAGES:
- logging.info("%s in SKIPPED_PACKAGES, skip package", self.fullname)
- self.skip = True
- return
-
- def _FindEbuildPath(self):
- """Populate package info from an ebuild retrieved via equery."""
- # By default, equery returns the latest version of the package. A
- # build may have used an older version than what is currently
- # available in the source tree (a build dependency can be pinned
- # to an older version of a package for compatibility
- # reasons). Therefore we need to tell equery that we want the
- # exact version number used in the image build as opposed to the
- # latest available in the source tree.
- args = ['equery-%s' % self.board, '-q', '-C', 'which', self.fullnamerev]
- try:
- path = cros_build_lib.RunCommand(args, print_cmd=True,
- redirect_stdout=True).output.strip()
- if not path:
- raise AssertionError
- except:
- raise AssertionError('GetEbuildPath for %s failed.\n'
- 'Is your tree clean? Delete %s and rebuild' %
- (self.name,
- cros_build_lib.GetSysroot(board=self.board)))
- logging.debug("%s -> %s", " ".join(args), path)
-
- if not os.access(path, os.F_OK):
- raise AssertionError("Can't access %s", path)
-
- self.ebuild_path = path
-
- def _ReadEbuildMetadata(self):
- """Read package metadata retrieved via portageq."""
- args = ['portageq-%s' % self.board, 'metadata',
- cros_build_lib.GetSysroot(board=self.board), 'ebuild',
- self.fullnamerev, 'HOMEPAGE', 'LICENSE']
- tmp = cros_build_lib.RunCommand(args, print_cmd=debug,
- redirect_stdout=True)
- lines = tmp.output.splitlines()
- # Runs:
- # portageq metadata /build/x86-alex ebuild net-misc/wget-1.12-r2 \
- # HOMEPAGE LICENSE
- # Returns:
- # http://www.gnu.org/software/wget/
- # GPL-3
- self.homepages, self.ebuild_license_names = (
- lines[0].split(), lines[1].split())
-
- def _TestEbuildContents(self):
- """Discover if the ebuild installed any files.
-
- Returns:
- bool which tells if any files were installed.
- """
- # Search for anything the ebuild might install, other than a directory.
- args = ['equery-%s' % self.board, '-q', '-C', 'files', self.fullnamerev,
- '-f', 'obj']
- tmp = cros_build_lib.RunCommand(args, print_cmd=debug, redirect_stdout=True)
- lines = tmp.output.splitlines()
-
- # lines is an array of the file names installed by the ebuild.
- return bool(lines)
-
- def GetLicenses(self):
- """Get licenses from the ebuild field and the unpacked source code.
-
- Some packages have static license mappings applied to them that get
- retrieved from the ebuild.
-
- For others, we figure out whether the package source should be scanned to
- add licenses found there.
-
- Raises:
- AssertionError: on runtime errors
- PackageLicenseError: couldn't find license in ebuild and source.
- """
- if self.build_source_tree:
- # If the total size installed is zero, we installed no content to license.
- if self._BuildInfo("SIZE").strip() == '0':
- self.skip = True
- return
- self.homepages = self._BuildInfo("HOMEPAGE").split()
- self.ebuild_license_names = self._BuildInfo("LICENSE").split()
- else:
- self._FindEbuildPath()
- self._ReadEbuildMetadata()
- self.skip = self.skip or not self._TestEbuildContents()
-
- # If this ebuild only uses skipped licenses, skip it.
- if (self.ebuild_license_names and
- all(l in SKIPPED_LICENSES for l in self.ebuild_license_names)):
- self.skip = True
-
- if self.skip:
- return
-
- if self.fullname in PACKAGE_HOMEPAGES:
- self.homepages = PACKAGE_HOMEPAGES[self.fullname]
-
- # Packages with missing licenses or licenses that need mapping (like
- # BSD/MIT) are hardcoded here:
- if self.fullname in PACKAGE_LICENSES:
- self.ebuild_license_names = PACKAGE_LICENSES[self.fullname]
- logging.info("Static license mapping for %s: %s", self.fullnamerev,
- ",".join(self.ebuild_license_names))
- else:
- logging.info("Read licenses for %s: %s", self.fullnamerev,
- ",".join(self.ebuild_license_names))
- # Lots of packages in chromeos-base have their license set to BSD instead
- # of BSD-Google:
- new_license_names = []
- for license_name in self.ebuild_license_names:
- # TODO: temp workaround for http;//crbug.com/348750 , remove when the bug
- # is fixed.
- if (license_name == "BSD" and
- self.fullnamerev.startswith("chromeos-base/")):
- license_name = "BSD-Google"
- logging.error(
- "Fixed BSD->BSD-Google for %s because it's in chromeos-base. "
- "Please fix the LICENSE field in the ebuild", self.fullnamerev)
- # TODO: temp workaround for http;//crbug.com/348749 , remove when the bug
- # is fixed.
- if license_name == "Proprietary":
- license_name = "Google-TOS"
- logging.error(
- "Fixed Proprietary -> Google-TOS for %s. "
- "Please fix the LICENSE field in the ebuild", self.fullnamerev)
- new_license_names.append(license_name)
- self.ebuild_license_names = new_license_names
+from chromite.licensing import licenses_lib
- # The ebuild license field can look like:
- # LICENSE="GPL-3 LGPL-3 Apache-2.0" (this means AND, as in all 3)
- # for third_party/portage-stable/app-admin/rsyslog/rsyslog-5.8.11.ebuild
- # LICENSE="|| ( LGPL-2.1 MPL-1.1 )"
- # for third_party/portage-stable/x11-libs/cairo/cairo-1.8.8.ebuild
-
- # The parser isn't very smart and only has basic support for the
- # || ( X Y ) OR logic to do the following:
- # In order to save time needlessly unpacking packages and looking or a
- # cleartext license (which is really a crapshoot), if we have a license
- # like BSD that requires looking for copyright attribution, but we can
- # chose another license like GPL, we do that.
-
- if not self.skip and not self.ebuild_license_names:
- logging.error("%s: no license found in ebuild. FIXME!", self.fullnamerev)
- # In a bind, you could comment this out. I'm making the output fail to
- # get your attention since this error really should be fixed, but if you
- # comment out the next line, the script will try to find a license inside
- # the source.
- raise PackageLicenseError()
-
- # This is not invalid, but the parser can't deal with it, so if it ever
- # happens, error out to tell the programmer to do something.
- # dev-python/pycairo-1.10.0-r4: LGPL-3 || ( LGPL-2.1 MPL-1.1 )
- if "||" in self.ebuild_license_names[1:]:
- logging.error("%s: Can't parse || in the middle of a license: %s",
- self.fullnamerev, ' '.join(self.ebuild_license_names))
- raise PackageLicenseError()
-
- or_licenses_and_one_is_no_attribution = False
- # We do a quick early pass first so that the longer pass below can
- # run accordingly.
- for license_name in [x for x in self.ebuild_license_names
- if x not in LICENCES_IGNORE]:
- # Here we have an OR case, and one license that we can use stock, so
- # we remember that in order to be able to skip license attributions if
- # any were in the OR.
- if (self.ebuild_license_names[0] == "||" and
- license_name not in COPYRIGHT_ATTRIBUTION_LICENSES):
- or_licenses_and_one_is_no_attribution = True
-
- for license_name in [x for x in self.ebuild_license_names
- if x not in LICENCES_IGNORE]:
- # Licenses like BSD or MIT can't be used as is because they do not contain
- # copyright self. They have to be replaced by copyright file given in the
- # source code, or manually mapped by us in PACKAGE_LICENSES
- if license_name in COPYRIGHT_ATTRIBUTION_LICENSES:
- # To limit needless efforts, if a package is BSD or GPL, we ignore BSD
- # and use GPL to avoid scanning the package, but we can only do this if
- # or_licenses_and_one_is_no_attribution has been set above.
- # This ensures that if we have License: || (BSD3 BSD4), we will
- # look in the source.
- if or_licenses_and_one_is_no_attribution:
- logging.info("%s: ignore license %s because ebuild LICENSES had %s",
- self.fullnamerev, license_name,
- ' '.join(self.ebuild_license_names))
- else:
- logging.info("%s: can't use %s, will scan source code for copyright",
- self.fullnamerev, license_name)
- self.need_copyright_attribution = True
- self.scan_source_for_licenses = True
- else:
- self.license_names.add(license_name)
- # We can't display just 2+ because it only contains text that says to
- # read v2 or v3.
- if license_name == 'GPL-2+':
- self.license_names.add('GPL-2')
- if license_name == 'LGPL-2+':
- self.license_names.add('LGPL-2')
-
- if license_name in LOOK_IN_SOURCE_LICENSES:
- logging.info("%s: Got %s, will try to find better license in source...",
- self.fullnamerev, license_name)
- self.scan_source_for_licenses = True
-
- if self.license_names:
- logging.info('%s: using stock|cust license(s) %s',
- self.fullnamerev, ','.join(self.license_names))
-
- # If the license(s) could not be found, or one requires copyright
- # attribution, dig in the source code for license files:
- # For instance:
- # Read licenses from ebuild for net-dialup/ppp-2.4.5-r3: BSD,GPL-2
- # We need get the substitution file for BSD and add it to GPL.
- if self.scan_source_for_licenses:
- self._ExtractLicenses()
-
- # This shouldn't run, but leaving as sanity check.
- if not self.license_names and not self.license_text_scanned:
- raise AssertionError("Didn't find usable licenses for %s" %
- self.fullnamerev)
-
-
-class Licensing(object):
- """Do the actual work of extracting licensing info and outputting html."""
-
- def __init__(self, board, package_fullnames, gen_licenses):
- # eg x86-alex
- self.board = board
- # List of stock and custom licenses referenced in ebuilds. Used to
- # print a report. Dict value says which packages use that license.
- self.licenses = {}
-
- # Licenses are supposed to be generated at package build time and be
- # ready for us, but in case they're not, they can be generated.
- self.gen_licenses = gen_licenses
-
- # This keeps track of whether we have an incomplete license file due to
- # package errors during parsing.
- # Any non empty list at the end shows the list of packages that caused
- # errors.
- self.incomplete_packages = []
-
- self.package_text = {}
- self.entry_template = None
-
- # We need to have a dict for the list of packages objects, index by package
- # fullnamerev, so that when we scan our licenses at the end, and find out
- # some shared licenses are only used by one package, we can access that
- # package object by name, and add the license directly in that object.
- self.packages = {}
- self._package_fullnames = package_fullnames
-
- @property
- def sorted_licenses(self):
- return sorted(self.licenses.keys(), key=str.lower)
-
- def _SaveLicenseDump(self, pkg):
- if pkg.build_source_tree:
- save_file = "%s/build-info/license.yaml" % pkg.build_source_tree
- else:
- save_file = pkg.license_dump_path
- logging.debug("Saving license to %s", save_file)
- save_dir = os.path.dirname(save_file)
- if not os.path.isdir(save_dir):
- os.makedirs(save_dir, 0755)
- with open(save_file, "w") as f:
- yaml_dump = []
- for key, value in pkg.__dict__.items():
- yaml_dump.append([key, value])
- f.write(yaml.dump(yaml_dump))
-
- def _LoadLicenseDump(self, pkg):
- save_file = pkg.license_dump_path
- logging.debug("Getting license from %s for %s", save_file, pkg.name)
- with open(save_file, "r") as f:
- # yaml.safe_load barfs on unicode it output, but we don't really need it.
- yaml_dump = yaml.load(f)
- for key, value in yaml_dump:
- pkg.__dict__[key] = value
-
- def LicensedPackages(self, license_name):
- """Return list of packages using a given license."""
- return self.licenses[license_name]
-
- def LoadPackageInfo(self, board):
- """Populate basic package info for all packages from their ebuild."""
- for package_name in self._package_fullnames:
- pkg = PackageInfo()
- pkg.board = board
- pkg.GetPackageInfo(package_name)
- self.packages[package_name] = pkg
-
- def HookPackageProcess(self, pkg_build_path):
- """Different entry point to populate a packageinfo.
-
- This is called instead of LoadPackageInfo when called by a package build.
-
- Args:
- pkg_build_path: unpacked being built by emerge.
- """
- pkg = PackageInfo()
- pkg.build_source_tree = pkg_build_path
- pkg.GetPackageInfo(None)
- if not pkg.skip:
- pkg.GetLicenses()
- self._SaveLicenseDump(pkg)
-
- def ProcessPackageLicenses(self):
- """Iterate through all packages provided and gather their licenses.
-
- GetLicenses will scrape licenses from the code and/or gather stock license
- names. We gather the list of stock and custom ones for later processing.
-
- Do not call this after adding virtual packages with AddExtraPkg.
- """
- for package_name in self.packages:
- pkg = self.packages[package_name]
- if pkg.skip:
- if self.gen_licenses:
- logging.info("Package %s is in skip list", package_name)
- else:
- # If we do a licensing run expecting to get licensing objects from
- # an image build, virtual packages will be missing such objects
- # because virtual packages do not get the install hook run at build
- # time. Because this script may not have permissions to write in the
- # /var/db/ directory, we don't want it to generate useless license
- # bits for virtual packages. As a result, ignore virtual packages
- # here.
- if pkg.category == "virtual":
- logging.debug("Ignoring %s virtual package", package_name)
- continue
-
- # Other skipped packages get dumped with incomplete info and the skip flag
- if not os.path.exists(pkg.license_dump_path) and not self.gen_licenses:
- logging.warning(">>> License for %s is missing, creating now <<<",
- package_name)
- if not os.path.exists(pkg.license_dump_path) or self.gen_licenses:
- if not pkg.skip:
- try:
- pkg.GetLicenses()
- except PackageLicenseError:
- pkg.licensing_failed = True
- # We dump packages where licensing failed too.
- self._SaveLicenseDump(pkg)
-
- # To debug the code, we force the data to be re-read from the dumps
- # instead of reusing what we may have in memory.
- for package_name in self.packages:
- pkg = self.packages[package_name]
- if pkg.category == "virtual":
- continue
-
- self._LoadLicenseDump(pkg)
- logging.debug("loaded dump for %s", pkg.fullnamerev)
- if pkg.skip:
- logging.info("Package %s is in skip list", pkg.fullnamerev)
- if pkg.licensing_failed:
- logging.info("Package %s failed licensing", pkg.fullnamerev)
- self.incomplete_packages += [pkg.fullnamerev]
-
- def AddExtraPkg(self, pkg_data):
- """Allow adding pre-created virtual packages.
-
- GetLicenses will not work on them, so add them after having run
- ProcessPackages.
-
- Args:
- pkg_data: array of package data as defined below
- """
- pkg = PackageInfo()
- pkg.board = self.board
- pkg.category = pkg_data[0]
- pkg.name = pkg_data[1]
- pkg.version = pkg_data[2]
- pkg.homepages = pkg_data[3] # this is a list
- pkg.license_names = pkg_data[4] # this is also a list
- pkg.ebuild_license_names = pkg_data[4]
- self.packages[pkg.fullnamerev] = pkg
-
- # Called directly by src/repohooks/pre-upload.py
- @staticmethod
- def FindLicenseType(license_name):
- """Says if a license is stock Gentoo, custom, or doesn't exist."""
-
- for directory in STOCK_LICENSE_DIRS:
- path = '%s/%s' % (directory, license_name)
- if os.path.exists(path):
- return "Gentoo Package Stock"
-
- for directory in CUSTOM_LICENSE_DIRS:
- path = '%s/%s' % (directory, license_name)
- if os.path.exists(path):
- return "Custom"
-
- if license_name in SKIPPED_LICENSES:
- return "Custom"
-
- raise AssertionError("""
-license %s could not be found in %s
-If the license in the ebuild is correct,
-a) a stock license should be added to portage-stable/licenses :
-running `cros_portage_upgrade` inside of the chroot should clone this repo
-to /tmp/portage/:
-https://chromium.googlesource.com/chromiumos/overlays/portage/+/gentoo
-find the new licenses under licenses, and add them to portage-stable/licenses
-
-b) if it's a non gentoo package with a custom license, you can copy that license
-to third_party/chromiumos-overlay/licenses/
-
-Try re-running the script with -p cat/package-ver --generate
-after fixing the license.""" %
- (license_name,
- '\n'.join(STOCK_LICENSE_DIRS + CUSTOM_LICENSE_DIRS))
- )
-
- @staticmethod
- def ReadSharedLicense(license_name):
- """Read and return stock or cust license file specified in an ebuild."""
-
- license_path = None
- for directory in STOCK_LICENSE_DIRS + CUSTOM_LICENSE_DIRS:
- path = os.path.join(directory, license_name)
- if os.path.exists(path):
- license_path = path
- break
-
- if license_path:
- return ReadUnknownEncodedFile(license_path, "read license")
- else:
- raise AssertionError("license %s could not be found in %s"
- % (license_name,
- '\n'.join(STOCK_LICENSE_DIRS +
- CUSTOM_LICENSE_DIRS))
- )
-
- @staticmethod
- def EvaluateTemplate(template, env):
- """Expand a template with vars like {{foo}} using a dict of expansions."""
- # TODO switch to stock python templates.
- for key, val in env.iteritems():
- template = template.replace('{{%s}}' % key, val)
- return template
-
- def _GeneratePackageLicenseText(self, pkg):
- """Concatenate all licenses related to a pkg.
-
- This means a combination of ebuild shared licenses and licenses read from
- the pkg source tree, if any.
-
- Args:
- pkg: PackageInfo object
-
- Raises:
- AssertionError: on runtime errors
- """
- license_text = []
- for license_text_scanned in pkg.license_text_scanned:
- license_text.append(license_text_scanned)
- license_text.append('%s\n' % ('-=' * 40))
-
- license_pointers = []
- # sln: shared license name.
- for sln in pkg.license_names:
- # Says whether it's a stock gentoo or custom license.
- license_type = self.FindLicenseType(sln)
- license_pointers.append(
- "<li><a href='#%s'>%s License %s</a></li>" % (
- sln, license_type, sln))
-
- # This should get caught earlier, but one extra check.
- if not license_text + license_pointers:
- raise AssertionError('Ended up with no license_text for %s', pkg.name)
-
- env = {
- 'name': "%s-%s" % (pkg.name, pkg.version),
- 'url': cgi.escape(pkg.homepages[0]) if pkg.homepages else '',
- 'licenses_txt': cgi.escape('\n'.join(license_text)) or '',
- 'licenses_ptr': '\n'.join(license_pointers) or '',
- }
- self.package_text[pkg] = self.EvaluateTemplate(self.entry_template, env)
-
- def GenerateHTMLLicenseOutput(self, output_file,
- output_template=TMPL,
- entry_template=ENTRY_TMPL,
- license_template=SHARED_LICENSE_TMPL):
- """Generate the combined html license file used in ChromeOS.
-
- Args:
- output_file: resulting HTML license output.
- output_template: template for the entire HTML file.
- entry_template: template for per package entries.
- license_template: template for shared license entries.
- """
- self.entry_template = ReadUnknownEncodedFile(entry_template)
- sorted_license_txt = []
-
- # Keep track of which licenses are used by which packages.
- for pkg in self.packages.values():
- if pkg.skip or pkg.licensing_failed:
- continue
- for sln in pkg.license_names:
- self.licenses.setdefault(sln, []).append(pkg.fullnamerev)
-
- # Find licenses only used once, and roll them in the package that uses them.
- # We use keys() because licenses is modified in the loop, so we can't use
- # an iterator.
- for sln in self.licenses.keys():
- if len(self.licenses[sln]) == 1:
- pkg_fullnamerev = self.licenses[sln][0]
- logging.info("Collapsing shared license %s into single use license "
- "(only used by %s)", sln, pkg_fullnamerev)
- license_type = self.FindLicenseType(sln)
- license_txt = self.ReadSharedLicense(sln)
- single_license = "%s License %s:\n\n%s" % (license_type, sln,
- license_txt)
- pkg = self.packages[pkg_fullnamerev]
- pkg.license_text_scanned.append(single_license)
- pkg.license_names.remove(sln)
- del self.licenses[sln]
-
- for pkg in sorted(self.packages.values(),
- key=lambda x: (x.name.lower(), x.version, x.revision)):
- if pkg.skip:
- logging.debug("Skipping package %s", pkg.fullnamerev)
- continue
- if pkg.licensing_failed:
- logging.debug("Package %s failed licensing, skipping", pkg.fullnamerev)
- continue
- self._GeneratePackageLicenseText(pkg)
- sorted_license_txt += [self.package_text[pkg]]
-
- # Now generate the bottom of the page that will contain all the shared
- # licenses and a list of who is pointing to them.
- license_template = ReadUnknownEncodedFile(license_template)
-
- licenses_txt = []
- for license_name in self.sorted_licenses:
- env = {
- 'license_name': license_name,
- 'license': cgi.escape(self.ReadSharedLicense(license_name)),
- 'license_type': self.FindLicenseType(license_name),
- 'license_packages': ' '.join(self.LicensedPackages(license_name)),
- }
- licenses_txt += [self.EvaluateTemplate(license_template, env)]
-
- file_template = ReadUnknownEncodedFile(output_template)
- env = {
- 'entries': '\n'.join(sorted_license_txt),
- 'licenses': '\n'.join(licenses_txt),
- }
- osutils.WriteFile(output_file,
- self.EvaluateTemplate(file_template, env).encode('UTF-8'))
-
-
-def ListInstalledPackages(board, all_packages=False):
- """Return a list of all packages installed for a particular board."""
-
- # If all_packages is set to True, all packages visible in the build
- # chroot are used to generate the licensing file. This is not what you want
- # for a release license file, but it's a way to run licensing checks against
- # all packages.
- # If it's set to False, it will only generate a licensing file that contains
- # packages used for a release build (as determined by the dependencies for
- # virtual/target-os).
-
- if all_packages:
- # The following returns all packages that were part of the build tree
- # (many get built or used during the build, but do not get shipped).
- # Note that it also contains packages that are in the build as
- # defined by build_packages but not part of the image we ship.
- args = ["equery-%s" % board, "list", "*"]
- packages = cros_build_lib.RunCommand(args, print_cmd=debug,
- redirect_stdout=True
- ).output.splitlines()
- else:
- # The following returns all packages that were part of the build tree
- # (many get built or used during the build, but do not get shipped).
- # Note that it also contains packages that are in the build as
- # defined by build_packages but not part of the image we ship.
- args = ["emerge-%s" % board, "--with-bdeps=y", "--usepkgonly",
- "--emptytree", "--pretend", "--color=n", "virtual/target-os"]
- emerge = cros_build_lib.RunCommand(args, print_cmd=debug,
- redirect_stdout=True).output.splitlines()
- # Another option which we've decided not to use, is bdeps=n. This outputs
- # just the packages we ship, but does not packages that were used to build
- # them, including a package like flex which generates a .a that is included
- # and shipped in ChromeOS.
- # We've decided to credit build packages, even if we're not legally required
- # to (it's always nice to do), and that way we get corner case packages like
- # flex. This is why we use bdep=y and not bdep=n.
-
- packages = []
- # [binary R ] x11-libs/libva-1.1.1 to /build/x86-alex/
- pkg_rgx = re.compile(r'\[[^]]+R[^]]+\] (.+) to /build/.*')
- # If we match something else without the 'R' like
- # [binary U ] chromeos-base/pepper-flash-13.0.0.133-r1 [12.0.0.77-r1]
- # this is bad and we should die on this.
- pkg_rgx2 = re.compile(r'(\[[^]]+\] .+) to /build/.*')
- for line in emerge:
- match = pkg_rgx.search(line)
- match2 = pkg_rgx2.search(line)
- if match:
- packages.append(match.group(1))
- elif match2:
- raise AssertionError("Package incorrectly installed, try eclean-%s" %
- board, "\n%s" % match2.group(1))
-
- return packages
-
-
-def _HandleIllegalXMLChars(text):
- """Handles illegal XML Characters.
-
- XML 1.0 acceptable character range:
- Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | \
- [#x10000-#x10FFFF]
-
- This function finds all illegal characters in the text and filters
- out all whitelisted characters (e.g. ^L).
-
- Args:
- text: text to examine.
-
- Returns:
- Filtered |text| and a list of non-whitelisted illegal characters found.
- """
- whitelist_re = re.compile(u'[\x0c]')
- text = whitelist_re.sub('', text)
- # illegal_chars_re includes all illegal characters (whitelisted or
- # not), so we can expand the whitelist without modifying this line.
- illegal_chars_re = re.compile(
- u'[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]')
- return (text, illegal_chars_re.findall(text))
-
-
-def ReadUnknownEncodedFile(file_path, logging_text=None):
- """Read a file of unknown encoding (UTF-8 or latin) by trying in sequence.
-
- Args:
- file_path: what to read.
- logging_text: what to display for logging depending on file read.
-
- Returns:
- File content, possibly converted from latin1 to UTF-8.
-
- Raises:
- Assertion error: if non-whitelisted illegal XML characters
- are found in the file.
- ValueError: returned if we get invalid XML.
- """
- try:
- with codecs.open(file_path, encoding="utf-8") as c:
- file_txt = c.read()
- if logging_text:
- logging.info("%s %s (UTF-8)", logging_text, file_path)
- except UnicodeDecodeError:
- with codecs.open(file_path, encoding="latin1") as c:
- file_txt = c.read()
- if logging_text:
- logging.info("%s %s (latin1)", logging_text, file_path)
-
- file_txt, char_list = _HandleIllegalXMLChars(file_txt)
-
- if char_list:
- raise ValueError('Illegal XML characters %s found in %s.' %
- (char_list, file_path))
-
- return file_txt
-
-
-def NonHookMain(opts):
+# These packages exist as workarounds....
+#
+# X is listed to avoid installing licensing info for all split X packages.
+# sys-boot packages are listed as a partial work around for not having per-board
+# credits files (TODO(dgarrett): Remove when crbug.com/197970 fixed).
+EXTRA_PACKAGES = (
+ ('x11-base', 'X.Org', '1.9.3', ['http://www.x.org/'], ['X']),
+ ('sys-kernel', 'Linux', '2.6', ['http://www.kernel.org/'], ['GPL-2']),
+ ('sys-boot', 'u-boot', '2013.06', ['http://www.denx.de/wiki/U-Boot'],
+ ['GPL-2']),
+ ('sys-boot', 'coreboot', '2013.04', ['http://www.coreboot.org/'],
+ ['GPL-2']),
+)
+
+
+def LoadPackageInfo(board, all_packages, generateMissing, packages):
"""Do the work when we're not called as a hook."""
-
- board, all_packages, gen_licenses, output_file = (
- opts.board, opts.all_packages, opts.gen_licenses, opts.output)
- packages_mode = bool(opts.package)
-
- if not board:
- raise AssertionError("No board given (--board)")
logging.info("Using board %s.", board)
builddir = os.path.join(cros_build_lib.GetSysroot(board=board),
'tmp', 'portage')
+
if not os.path.exists(builddir):
raise AssertionError(
"FATAL: %s missing.\n"
"Did you give the right board and build that tree?" % builddir)
- if not output_file and not gen_licenses:
- logging.warning("You are not generating licenses and you didn't ask for "
- "output. As a result this script will do nothing useful.")
- license_dir = "%s/%s/" % (cros_build_lib.GetSysroot(board),
- PER_PKG_LICENSE_DIR)
- if not os.path.exists(license_dir):
- raise AssertionError("FATAL: %s missing.\n" % license_dir)
- if gen_licenses and os.geteuid() != 0:
- raise AssertionError("Run with sudo if you use --generate-licenses.")
+ detect_packages = not packages
+ if detect_packages:
+ # If no packages were specified, we look up the full list.
+ packages = licenses_lib.ListInstalledPackages(board, all_packages)
- if packages_mode:
- packages = opts.package
- else:
- packages = ListInstalledPackages(board, all_packages)
if not packages:
raise AssertionError('FATAL: Could not get any packages for board %s' %
board)
+
logging.debug("Initial Package list to work through:\n%s",
'\n'.join(sorted(packages)))
- licensing = Licensing(board, packages, gen_licenses)
+ licensing = licenses_lib.Licensing(board, packages, generateMissing)
+
licensing.LoadPackageInfo(board)
logging.debug("Package list to skip:\n%s",
'\n'.join([p for p in sorted(packages)
@@ -1404,66 +177,56 @@ def NonHookMain(opts):
'\n'.join([p for p in sorted(packages)
if not licensing.packages[p].skip]))
licensing.ProcessPackageLicenses()
- if not packages_mode:
- # We add 2 virtual packages as well as 2 boot packages that are included
- # with some hardware, but not in the image or package list.
- for extra_pkg in [
- ['x11-base', 'X.Org', '1.9.3', ['http://www.x.org/'], ['X']],
- ['sys-kernel', 'Linux', '2.6', ['http://www.kernel.org/'], ['GPL-2']],
- ['sys-boot', 'u-boot', '2013.06', ['http://www.denx.de/wiki/U-Boot'],
- ['GPL-2+']],
- ['sys-boot', 'coreboot', '2013.04', ['http://www.coreboot.org/'],
- ['GPL-2']],
- ]:
+ if detect_packages:
+ # If we detected 'all' packages, we have to add in these extras.
+ for extra_pkg in EXTRA_PACKAGES:
licensing.AddExtraPkg(extra_pkg)
- if output_file:
- licensing.GenerateHTMLLicenseOutput(output_file)
-
if licensing.incomplete_packages:
raise AssertionError("""
-DO NOT USE OUTPUT!!!
Some packages are missing due to errors, please look at errors generated
during this run.
List of packages with errors:
%s
""" % '\n'.join(licensing.incomplete_packages))
+ return licensing
-def main(args):
- # pylint: disable=W0603
- global debug
- # pylint: enable=W0603
+def main(args):
parser = commandline.ArgumentParser(usage=__doc__)
parser.add_argument("-b", "--board",
help="which board to run for, like x86-alex")
parser.add_argument("-p", "--package", action="append", default=[],
+ dest="packages",
help="check the license of the package, e.g.,"
"dev-libs/libatomic_ops-7.2d")
parser.add_argument("-a", "--all-packages", action="store_true",
dest="all_packages",
help="Run licensing against all packages in the "
- "build tree")
+ "build tree, instead of just virtual/target-os "
+ "dependencies.")
parser.add_argument("-g", "--generate-licenses", action="store_true",
dest="gen_licenses",
help="Generate licensing bits for each package before "
"making license file\n(default is to use build time "
"license bits)")
- parser.add_argument("-k", "--hook", type="path", dest="hook",
- help="Hook mode takes a single package and outputs its "
- "license on stdout. Give $PORTAGE_BUILDDIR as argument.")
parser.add_argument("-o", "--output", type="path",
help="which html file to create with output")
opts = parser.parse_args(args)
- debug = opts.debug
- debug = True
- hook_path = opts.hook
- # This get called from src/scripts/hooks/install/gen-package-licenses.sh
- if hook_path:
- licensing = Licensing(None, None, True)
- licensing.HookPackageProcess(hook_path)
- else:
- NonHookMain(opts)
+ if not opts.board:
+ raise AssertionError("No board given (--board)")
+
+ if not opts.output and not opts.gen_licenses:
+ raise AssertionError("You must specify --output and/or --generate-licenses")
+
+ if opts.gen_licenses and os.geteuid() != 0:
+ raise AssertionError("Run with sudo if you use --generate-licenses.")
+
+ licensing = LoadPackageInfo(
+ opts.board, opts.all_packages, opts.gen_licenses, opts.packages)
+
+ if opts.output:
+ licensing.GenerateHTMLLicenseOutput(opts.output)
diff --git a/licensing/licenses_lib.py b/licensing/licenses_lib.py
new file mode 100644
index 000000000..c116814ef
--- /dev/null
+++ b/licensing/licenses_lib.py
@@ -0,0 +1,1241 @@
+#!/usr/bin/python
+# Copyright 2012 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Library for validating ebuild license information, and generating credits.
+
+Documentation on this script is also available here:
+ http://www.chromium.org/chromium-os/licensing
+"""
+
+import cgi
+import codecs
+import logging
+import os
+import re
+import tempfile
+
+from chromite.cbuildbot import constants
+from chromite.cbuildbot import portage_utilities
+from chromite.lib import cros_build_lib
+from chromite.lib import osutils
+
+# We are imported by src/repohooks/pre-upload.py in a non chroot environment
+# where yaml may not be there, so we don't error on that since it's not needed
+# in that case.
+try:
+ import yaml
+except ImportError:
+ yaml = None
+
+debug = True
+
+# See http://crbug.com/207004 for discussion.
+PER_PKG_LICENSE_DIR = '/var/db/pkg'
+
+STOCK_LICENSE_DIRS = [
+ os.path.join(constants.SOURCE_ROOT,
+ 'src/third_party/portage-stable/licenses'),
+]
+
+# There are licenses for custom software we got and isn't part of
+# upstream gentoo.
+CUSTOM_LICENSE_DIRS = [
+ os.path.join(constants.SOURCE_ROOT,
+ 'src/third_party/chromiumos-overlay/licenses'),
+]
+
+COPYRIGHT_ATTRIBUTION_DIR = (
+ os.path.join(
+ constants.SOURCE_ROOT,
+ 'src/third_party/chromiumos-overlay/licenses/copyright-attribution'))
+
+# Virtual packages don't need to have a license and often don't, so we skip them
+# chromeos-base contains google platform packages that are covered by the
+# general license at top of tree, so we skip those too.
+SKIPPED_CATEGORIES = [
+ 'virtual',
+]
+
+SKIPPED_PACKAGES = [
+ # Fix these packages by adding a real license in the code.
+ # You should not skip packages just because the license scraping doesn't
+ # work. Stick those special cases into PACKAGE_LICENSES.
+ # Packages should only be here because they are sub/split packages already
+ # covered by the license of the main package.
+
+ # These are Chrome-OS-specific packages, copyright BSD-Google
+ 'sys-kernel/chromeos-kernel', # already manually credit Linux
+]
+
+SKIPPED_LICENSES = [
+ # Some of our packages contain binary blobs for which we have special
+ # negotiated licenses, and no need to display anything publicly. Strongly
+ # consider using Google-TOS instead, if possible.
+ 'Proprietary-Binary',
+
+ # If you have an early repo for which license terms have yet to be decided
+ # use this. It will cause licensing for the package to be mostly ignored.
+ # Official should error for any package with this license.
+ 'TAINTED', # TODO(dgarrett): Error on official builds with this license.
+]
+
+LICENSE_NAMES_REGEX = [
+ r'^copyright$',
+ r'^copyright[.]txt$',
+ r'^copyright[.]regex$', # llvm
+ r'^copying.*$',
+ r'^licen[cs]e.*$',
+ r'^licensing.*$', # libatomic_ops
+ r'^ipa_font_license_agreement_v1[.]0[.]txt$', # ja-ipafonts
+ r'^PKG-INFO$', # copyright assignment for
+ # some python packages
+ # (netifaces, unittest2)
+]
+
+# These are _temporary_ license mappings for packages that do not have a valid
+# shared/custom license, or LICENSE file we can use.
+# Once this script runs earlier (during the package build process), it will
+# block new source without a LICENSE file if the ebuild contains a license
+# that requires copyright assignment (BSD and friends).
+# At that point, new packages will get fixed to include LICENSE instead of
+# adding workaround mappings like those below.
+# The way you now fix copyright attribution cases create a custom file with the
+# right license directly in COPYRIGHT_ATTRIBUTION_DIR.
+PACKAGE_LICENSES = {
+ # TODO: replace the naive license parsing code in this script with a hook
+ # into portage's license parsing. See http://crbug.com/348779
+
+ # Chrome (the browser) is complicated, it has a morphing license that is
+ # either BSD-Google, or BSD-Google,Google-TOS depending on how it was
+ # built. We bypass this problem for now by hardcoding the Google-TOS bit as
+ # per ChromeOS with non free bits
+ 'chromeos-base/chromeos-chrome': ['BSD-Google', 'Google-TOS'],
+
+ # Currently the code cannot parse LGPL-3 || ( LGPL-2.1 MPL-1.1 )
+ 'dev-python/pycairo': ['LGPL-3', 'LGPL-2.1'],
+}
+
+# Any license listed list here found in the ebuild will make the code look for
+# license files inside the package source code in order to get copyright
+# attribution from them.
+COPYRIGHT_ATTRIBUTION_LICENSES = [
+ 'BSD', # requires distribution of copyright notice
+ 'BSD-2', # so does BSD-2 http://opensource.org/licenses/BSD-2-Clause
+ 'BSD-3', # and BSD-3? http://opensource.org/licenses/BSD-3-Clause
+ 'BSD-4', # and 4?
+ 'BSD-with-attribution',
+ 'MIT',
+ 'MIT-with-advertising',
+ 'Old-MIT',
+]
+
+# The following licenses are not invalid or to show as a less helpful stock
+# license, but it's better to look in the source code for a more specific
+# license if there is one, but not an error if no better one is found.
+# Note that you don't want to set just anything here since any license here
+# will be included once in stock form and a second time in custom form if
+# found (there is no good way to know that a license we found on disk is the
+# better version of the stock version, so we show both).
+LOOK_IN_SOURCE_LICENSES = [
+ 'as-is', # The stock license is very vague, source always has more details.
+ 'PSF-2', # The custom license in python is more complete than the template.
+
+ # As far as I know, we have no requirement to do copyright attribution for
+ # these licenses, but the license included in the code has slightly better
+ # information than the stock Gentoo one (including copyright attribution).
+ 'BZIP2', # Single use license, do copyright attribution.
+ 'OFL', # Almost single use license, do copyright attribution.
+ 'OFL-1.1', # Almost single use license, do copyright attribution.
+ 'UoI-NCSA', # Only used by NSCA, might as well show their custom copyright.
+]
+
+# This used to provide overrides. I can't find a valid reason to add any more
+# here, though.
+PACKAGE_HOMEPAGES = {
+ # Example:
+ # 'x11-proto/glproto': ['http://www.x.org/'],
+}
+
+# These are tokens found in LICENSE= in an ebuild that aren't licenses we
+# can actually read from disk.
+# You should not use this to blacklist real licenses.
+LICENCES_IGNORE = [
+ ')', # Ignore OR tokens from LICENSE="|| ( LGPL-2.1 MPL-1.1 )"
+ '(',
+ '||',
+]
+
+TMPL = 'about_credits.tmpl'
+ENTRY_TMPL = 'about_credits_entry.tmpl'
+SHARED_LICENSE_TMPL = 'about_credits_shared_license_entry.tmpl'
+
+
+# This is called directly by src/repohooks/pre-upload.py
+def GetLicenseTypesFromEbuild(ebuild_path):
+ """Returns a list of license types from the ebuild file.
+
+ This function does not always return the correct list, but it is
+ faster than using portageq for not having to access chroot. It is
+ intended to be used for tasks such as presubmission checks.
+
+ Args:
+ ebuild_path: ebuild to read.
+
+ Returns:
+ list of licenses read from ebuild.
+
+ Raises:
+ ValueError: ebuild errors.
+ """
+ ebuild_env_tmpl = """
+has() { [[ " ${*:2} " == *" $1 "* ]]; }
+inherit() {
+ local overlay_list="%(overlay_list)s"
+ local eclass overlay f
+ for eclass; do
+ has ${eclass} ${_INHERITED_} && continue
+ _INHERITED_+=" ${eclass}"
+ for overlay in %(overlay_list)s; do
+ f="${overlay}/eclass/${eclass}.eclass"
+ if [[ -e ${f} ]]; then
+ source "${f}"
+ break
+ fi
+ done
+ done
+}
+source %(ebuild)s"""
+
+ # TODO: the overlay_list hard-coded here should be changed to look
+ # at the current overlay, and then the master overlays. E.g. for an
+ # ebuild file in overlay-parrot, we will look at parrot overlay
+ # first, and then look at portage-stable and chromiumos, which are
+ # listed as masters in overlay-parrot/metadata/layout.conf.
+ tmpl_env = {
+ 'ebuild': ebuild_path,
+ 'overlay_list': '%s %s' % (
+ os.path.join(constants.SOURCE_ROOT,
+ 'src/third_party/chromiumos-overlay'),
+ os.path.join(constants.SOURCE_ROOT,
+ 'src/third_party/portage-stable'))
+ }
+
+ with tempfile.NamedTemporaryFile(bufsize=0) as f:
+ osutils.WriteFile(f.name, ebuild_env_tmpl % tmpl_env)
+ env = osutils.SourceEnvironment(
+ f.name, whitelist=['LICENSE'], ifs=' ', multiline=True)
+
+ if not env.get('LICENSE'):
+ raise ValueError('No LICENSE found in the ebuild.')
+ if re.search(r'[,;]', env['LICENSE']):
+ raise ValueError(
+ 'LICENSE field in the ebuild should be whitespace-limited.')
+
+ return env['LICENSE'].split()
+
+
+class PackageLicenseError(Exception):
+ """Thrown if something fails while getting license information for a package.
+
+ This will cause the processing to error in the end.
+ """
+
+
+class PackageInfo(object):
+ """Package info containers, mostly for storing licenses."""
+
+ def __init__(self):
+
+ self.board = None
+ self.revision = None
+
+ # Array of scanned license texts.
+ self.license_text_scanned = []
+
+ self.category = None
+ self.name = None
+ self.version = None
+
+ # Looks something like this
+ # /mnt/host/source/src/
+ # third_party/portage-stable/net-misc/rsync/rsync-3.0.8.ebuild
+ self.ebuild_path = None
+
+ # Array of license names retrieved from ebuild or override in this code.
+ self.ebuild_license_names = []
+ self.homepages = []
+ # This contains licenses names we can read from Gentoo or custom licenses.
+ # These are supposed to be shared licenses (i.e. licenses referenced by
+ # more then one package), but after all processing, we may find out that
+ # some are only used once and they get taken out of the shared pool and
+ # pasted directly in the sole package that was using them (see
+ # GenerateHTMLLicenseOutput).
+ self.license_names = set()
+
+ # We set this if the ebuild has a BSD/MIT like license that requires
+ # scanning for a LICENSE file in the source code, or a static mapping
+ # in PACKAGE_LICENSES. Not finding one once this is set, is fatal.
+ self.need_copyright_attribution = False
+ # This flag just says we'd like to include licenses from the source, but
+ # not finding any is not fatal.
+ self.scan_source_for_licenses = False
+
+ # After reading basic package information, we can mark the package as
+ # one to skip in licensing.
+ self.skip = False
+
+ # If we failed to get licensing for this package, mark it as such so that
+ # it can be flagged when the full license file is being generated.
+ self.licensing_failed = False
+
+ # If we are called from a hook, we grab package info from the soure tree.
+ # This is also used as a flag to know whether we should do package work
+ # based on an installed package, or one that is being built and we got
+ # called from the hook.
+ self.build_source_tree = None
+
+ @property
+ def fullnamerev(self):
+ s = '%s-%s' % (self.fullname, self.version)
+ if self.revision:
+ s += '-r%s' % self.revision
+ return s
+
+ @property
+ def fullname(self):
+ return '%s/%s' % (self.category, self.name)
+
+ @property
+ def license_dump_path(self):
+ """e.g. /build/x86-alex//var/db/pkg/sys-apps/dtc-1.4.0/license.yaml."""
+ return "%s/%s/%s/license.yaml" % (cros_build_lib.GetSysroot(self.board),
+ PER_PKG_LICENSE_DIR, self.fullnamerev)
+
+ def _BuildInfo(self, filename):
+ filename = '%s/build-info/%s' % (self.build_source_tree, filename)
+ # Buildinfo properties we read are in US-ASCII, not Unicode.
+ try:
+ bi = open(filename).read().rstrip()
+ # Some properties like HOMEPAGE may be absent.
+ except IOError:
+ bi = ""
+ return bi
+
+ def _RunEbuildPhases(self, phases):
+ """Run a list of ebuild phases on an ebuild.
+
+ Args:
+ phases: list of phases like ['clean', 'fetch'] or ['unpack'].
+
+ Returns:
+ ebuild command output
+ """
+
+ return cros_build_lib.RunCommand(
+ ['ebuild-%s' % self.board, self.ebuild_path] + phases, print_cmd=debug,
+ redirect_stdout=True)
+
+ def _GetOverrideLicense(self):
+ """Look in COPYRIGHT_ATTRIBUTION_DIR for license with copyright attribution.
+
+ For dev-util/bsdiff-4.3-r5, the code will look for
+ dev-util/bsdiff-4.3-r5
+ dev-util/bsdiff-4.3
+ dev-util/bsdiff
+
+ It is ok to have more than one bsdiff license file, and an empty file acts
+ as a rubout (i.e. an empty dev-util/bsdiff-4.4 will shadow dev-util/bsdiff
+ and tell the licensing code to look in the package source for a license
+ instead of using dev-util/bsdiff as an override).
+
+ Returns:
+ False (no license found) or a multiline license string.
+ """
+ license_read = None
+ # dev-util/bsdiff-4.3-r5 -> bsdiff-4.3-r5
+ filename = os.path.basename(self.fullnamerev)
+ license_path = os.path.join(COPYRIGHT_ATTRIBUTION_DIR,
+ os.path.dirname(self.fullnamerev))
+ pv = portage_utilities.SplitPV(filename)
+ pv_no_rev = '%s-%s' % (pv.package, pv.version_no_rev)
+ for filename in (pv.pv, pv_no_rev, pv.package):
+ file_path = os.path.join(license_path, filename)
+ logging.debug("Looking for override copyright attribution license in %s",
+ file_path)
+ if os.path.exists(file_path):
+ # Turn
+ # /../merlin/trunk/src/third_party/chromiumos-overlay/../dev-util/bsdiff
+ # into
+ # chromiumos-overlay/../dev-util/bsdiff
+ short_dir_path = os.path.join(*file_path.rsplit(os.path.sep, 5)[1:])
+ license_read = "Copyright Attribution License %s:\n\n" % short_dir_path
+ license_read += ReadUnknownEncodedFile(
+ file_path, "read copyright attribution license")
+ break
+
+ return license_read
+
+ def _ExtractLicenses(self):
+ """Scrounge for text licenses in the source of package we'll unpack.
+
+ This is only called if we couldn't get usable licenses from the ebuild,
+ or one of them is BSD/MIT like which forces us to look for a file with
+ copyright attribution in the source code itself.
+
+ First, we have a shortcut where we scan COPYRIGHT_ATTRIBUTION_DIR to see if
+ we find a license for this package. If so, we use that.
+ Typically it'll be used if the unpacked source does not have the license
+ that we're required to display for copyright attribution (in some cases it's
+ plain absent, in other cases, it could be in a filename we don't look for).
+
+ Otherwise, we scan the unpacked source code for what looks like license
+ files as defined in LICENSE_NAMES_REGEX.
+
+ Raises:
+ AssertionError: on runtime errors
+ PackageLicenseError: couldn't find copyright attribution file.
+ """
+ license_override = self._GetOverrideLicense()
+ if license_override:
+ self.license_text_scanned = [license_override]
+ return
+
+ if self.build_source_tree:
+ workdir = "%s/work" % self.build_source_tree
+ else:
+ self._RunEbuildPhases(['clean', 'fetch'])
+ output = self._RunEbuildPhases(['unpack']).output.splitlines()
+ # Output is spammy, it looks like this:
+ # * gc-7.2d.tar.gz RMD160 SHA1 SHA256 size ;-) ... [ ok ]
+ # * checking gc-7.2d.tar.gz ;-) ... [ ok ]
+ # * Running stacked hooks for pre_pkg_setup
+ # * sysroot_build_bin_dir ...
+ # [ ok ]
+ # * Running stacked hooks for pre_src_unpack
+ # * python_multilib_setup ...
+ # [ ok ]
+ # >>> Unpacking source...
+ # >>> Unpacking gc-7.2d.tar.gz to /build/x86-alex/tmp/po/[...]ps-7.2d/work
+ # >>> Source unpacked in /build/x86-alex/tmp/portage/[...]ops-7.2d/work
+ # So we only keep the last 2 lines, the others we don't care about.
+ output = [line for line in output if line[0:3] == ">>>" and
+ line != ">>> Unpacking source..."]
+ for line in output:
+ logging.info(line)
+
+ args = ['portageq-%s' % self.board, 'envvar', 'PORTAGE_TMPDIR']
+ result = cros_build_lib.RunCommand(args, print_cmd=debug,
+ redirect_stdout=True)
+ tmpdir = result.output.splitlines()[0]
+ # tmpdir gets something like /build/daisy/tmp/
+ workdir = os.path.join(tmpdir, 'portage', self.fullnamerev, 'work')
+
+ if not os.path.exists(workdir):
+ raise AssertionError("Unpack of %s didn't create %s. Version mismatch" %
+ (self.fullnamerev, workdir))
+
+ # You may wonder how deep should we go?
+ # In case of packages with sub-packages, it could be deep.
+ # Let's just be safe and get everything we can find.
+ # In the case of libatomic_ops, it's actually required to look deep
+ # to find the MIT license:
+ # dev-libs/libatomic_ops-7.2d/work/gc-7.2/libatomic_ops/doc/LICENSING.txt
+ args = ['find', workdir, '-type', 'f']
+ result = cros_build_lib.RunCommand(args, print_cmd=debug,
+ redirect_stdout=True).output.splitlines()
+ # Truncate results to look like this: swig-2.0.4/COPYRIGHT
+ files = [x[len(workdir):].lstrip('/') for x in result]
+ license_files = []
+ for name in files:
+ # When we scan a source tree managed by git, this can contain license
+ # files that are not part of the source. Exclude those.
+ # (e.g. .git/refs/heads/licensing)
+ if ".git/" in name:
+ continue
+ basename = os.path.basename(name)
+ # Looking for license.* brings up things like license.gpl, and we
+ # never want a GPL license when looking for copyright attribution,
+ # so we skip them here. We also skip regexes that can return
+ # license.py (seen in some code).
+ if re.search(r".*GPL.*", basename) or re.search(r"\.py$", basename):
+ continue
+ for regex in LICENSE_NAMES_REGEX:
+ if re.search(regex, basename, re.IGNORECASE):
+ license_files.append(name)
+ break
+
+ if not license_files:
+ if self.need_copyright_attribution:
+ logging.error("""
+%s: unable to find usable license.
+Typically this will happen because the ebuild says it's MIT or BSD, but there
+was no license file that this script could find to include along with a
+copyright attribution (required for BSD/MIT).
+
+If this is Google source, please change
+LICENSE="BSD"
+to
+LICENSE="BSD-Google"
+
+If not, go investigate the unpacked source in %s,
+and find which license to assign. Once you found it, you should copy that
+license to a file under %s
+(or you can modify LICENSE_NAMES_REGEX to pickup a license file that isn't
+being scraped currently).""",
+ self.fullnamerev, workdir, COPYRIGHT_ATTRIBUTION_DIR)
+ raise PackageLicenseError()
+ else:
+ # We can get called for a license like as-is where it's preferable
+ # to find a better one in the source, but not fatal if we didn't.
+ logging.info("Was not able to find a better license for %s "
+ "in %s to replace the more generic one from ebuild",
+ self.fullnamerev, workdir)
+
+ # Examples of multiple license matches:
+ # dev-lang/swig-2.0.4-r1: swig-2.0.4/COPYRIGHT swig-2.0.4/LICENSE
+ # dev-libs/glib-2.32.4-r1: glib-2.32.4/COPYING pkg-config-0.26/COPYING
+ # dev-libs/libnl-3.2.14: libnl-doc-3.2.14/COPYING libnl-3.2.14/COPYING
+ # dev-libs/libpcre-8.30-r2: pcre-8.30/LICENCE pcre-8.30/COPYING
+ # dev-libs/libusb-0.1.12-r6: libusb-0.1.12/COPYING libusb-0.1.12/LICENSE
+ # dev-libs/pyzy-0.1.0-r1: db/COPYING pyzy-0.1.0/COPYING
+ # net-misc/strongswan-5.0.2-r4: strongswan-5.0.2/COPYING
+ # strongswan-5.0.2/LICENSE
+ # sys-process/procps-3.2.8_p11: debian/copyright procps-3.2.8/COPYING
+ logging.info('License(s) for %s: %s', self.fullnamerev,
+ ' '.join(license_files))
+ for license_file in sorted(license_files):
+ # Joy and pink ponies. Some license_files are encoded as latin1 while
+ # others are utf-8 and of course you can't know but only guess.
+ license_path = os.path.join(workdir, license_file)
+ license_txt = ReadUnknownEncodedFile(license_path, "Adding License")
+
+ self.license_text_scanned += [
+ "Scanned Source License %s:\n\n%s" % (license_file, license_txt)]
+
+ # We used to clean up here, but there have been many instances where
+ # looking at unpacked source to see where the licenses were, was useful
+ # so let's disable this for now
+ # self._RunEbuildPhases(['clean'])
+
+ def GetPackageInfo(self, fullnamewithrev):
+ """Populate PackageInfo with package license, and homepage.
+
+ self.ebuild_license_names will not be filled if the package is skipped
+ or if there was an issue getting data from the ebuild.
+ self.license_names will only get the licenses that we can paste
+ as shared licenses.
+ scan_source_for_licenses will be set if we should unpack the source to look
+ for licenses
+ if need_copyright_attribution is also set, not finding a license in the
+ source is fatal (PackageLicenseError will get raised).
+
+ Args:
+ fullnamewithrev: e.g. dev-libs/libatomic_ops-7.2d
+
+ Raises:
+ AssertionError: on runtime errors
+ """
+ if not fullnamewithrev:
+ if not self.build_source_tree:
+ raise AssertionError("Cannot continue without full name or source tree")
+ fullnamewithrev = "%s/%s" % (self._BuildInfo("CATEGORY"),
+ self._BuildInfo("PF"))
+ logging.debug("Computed package name %s from %s", fullnamewithrev,
+ self.build_source_tree)
+
+ try:
+ cpv = portage_utilities.SplitCPV(fullnamewithrev)
+ # A bad package can either raise a TypeError exception or return None,
+ # so we catch both cases.
+ if not cpv:
+ raise TypeError
+ except TypeError:
+ raise AssertionError("portage couldn't find %s, missing version number?" %
+ fullnamewithrev)
+
+ self.category, self.name, self.version, self.revision = (
+ cpv.category, cpv.package, cpv.version_no_rev, cpv.rev)
+
+ if self.revision is not None:
+ self.revision = str(self.revision).lstrip('r')
+ if self.revision == '0':
+ self.revision = None
+
+ if self.category in SKIPPED_CATEGORIES:
+ logging.info("%s in SKIPPED_CATEGORIES, skip package", self.fullname)
+ self.skip = True
+ return
+
+ if self.fullname in SKIPPED_PACKAGES:
+ logging.info("%s in SKIPPED_PACKAGES, skip package", self.fullname)
+ self.skip = True
+ return
+
+ def _FindEbuildPath(self):
+ """Populate package info from an ebuild retrieved via equery."""
+ # By default, equery returns the latest version of the package. A
+ # build may have used an older version than what is currently
+ # available in the source tree (a build dependency can be pinned
+ # to an older version of a package for compatibility
+ # reasons). Therefore we need to tell equery that we want the
+ # exact version number used in the image build as opposed to the
+ # latest available in the source tree.
+ args = ['equery-%s' % self.board, '-q', '-C', 'which', self.fullnamerev]
+ try:
+ path = cros_build_lib.RunCommand(args, print_cmd=True,
+ redirect_stdout=True).output.strip()
+ if not path:
+ raise AssertionError
+ except:
+ raise AssertionError('GetEbuildPath for %s failed.\n'
+ 'Is your tree clean? Delete %s and rebuild' %
+ (self.name,
+ cros_build_lib.GetSysroot(board=self.board)))
+ logging.debug("%s -> %s", " ".join(args), path)
+
+ if not os.access(path, os.F_OK):
+ raise AssertionError("Can't access %s", path)
+
+ self.ebuild_path = path
+
+ def _ReadEbuildMetadata(self):
+ """Read package metadata retrieved via portageq."""
+ args = ['portageq-%s' % self.board, 'metadata',
+ cros_build_lib.GetSysroot(board=self.board), 'ebuild',
+ self.fullnamerev, 'HOMEPAGE', 'LICENSE']
+ tmp = cros_build_lib.RunCommand(args, print_cmd=debug,
+ redirect_stdout=True)
+ lines = tmp.output.splitlines()
+ # Runs:
+ # portageq metadata /build/x86-alex ebuild net-misc/wget-1.12-r2 \
+ # HOMEPAGE LICENSE
+ # Returns:
+ # http://www.gnu.org/software/wget/
+ # GPL-3
+ self.homepages, self.ebuild_license_names = (
+ lines[0].split(), lines[1].split())
+
+ def _TestEbuildContents(self):
+ """Discover if the ebuild installed any files.
+
+ Returns:
+ bool which tells if any files were installed.
+ """
+ # Search for anything the ebuild might install, other than a directory.
+ args = ['equery-%s' % self.board, '-q', '-C', 'files', self.fullnamerev,
+ '-f', 'obj']
+ tmp = cros_build_lib.RunCommand(args, print_cmd=debug, redirect_stdout=True)
+ lines = tmp.output.splitlines()
+
+ # lines is an array of the file names installed by the ebuild.
+ return bool(lines)
+
+ def GetLicenses(self):
+ """Get licenses from the ebuild field and the unpacked source code.
+
+ Some packages have static license mappings applied to them that get
+ retrieved from the ebuild.
+
+ For others, we figure out whether the package source should be scanned to
+ add licenses found there.
+
+ Raises:
+ AssertionError: on runtime errors
+ PackageLicenseError: couldn't find license in ebuild and source.
+ """
+ if self.build_source_tree:
+ # If the total size installed is zero, we installed no content to license.
+ if self._BuildInfo("SIZE").strip() == '0':
+ self.skip = True
+ return
+ self.homepages = self._BuildInfo("HOMEPAGE").split()
+ self.ebuild_license_names = self._BuildInfo("LICENSE").split()
+ else:
+ self._FindEbuildPath()
+ self._ReadEbuildMetadata()
+ self.skip = self.skip or not self._TestEbuildContents()
+
+ # If this ebuild only uses skipped licenses, skip it.
+ if (self.ebuild_license_names and
+ all(l in SKIPPED_LICENSES for l in self.ebuild_license_names)):
+ self.skip = True
+
+ if self.skip:
+ return
+
+ if self.fullname in PACKAGE_HOMEPAGES:
+ self.homepages = PACKAGE_HOMEPAGES[self.fullname]
+
+ # Packages with missing licenses or licenses that need mapping (like
+ # BSD/MIT) are hardcoded here:
+ if self.fullname in PACKAGE_LICENSES:
+ self.ebuild_license_names = PACKAGE_LICENSES[self.fullname]
+ logging.info("Static license mapping for %s: %s", self.fullnamerev,
+ ",".join(self.ebuild_license_names))
+ else:
+ logging.info("Read licenses for %s: %s", self.fullnamerev,
+ ",".join(self.ebuild_license_names))
+
+ # Lots of packages in chromeos-base have their license set to BSD instead
+ # of BSD-Google:
+ new_license_names = []
+ for license_name in self.ebuild_license_names:
+ # TODO: temp workaround for http;//crbug.com/348750 , remove when the bug
+ # is fixed.
+ if (license_name == "BSD" and
+ self.fullnamerev.startswith("chromeos-base/")):
+ license_name = "BSD-Google"
+ logging.error(
+ "Fixed BSD->BSD-Google for %s because it's in chromeos-base. "
+ "Please fix the LICENSE field in the ebuild", self.fullnamerev)
+ # TODO: temp workaround for http;//crbug.com/348749 , remove when the bug
+ # is fixed.
+ if license_name == "Proprietary":
+ license_name = "Google-TOS"
+ logging.error(
+ "Fixed Proprietary -> Google-TOS for %s. "
+ "Please fix the LICENSE field in the ebuild", self.fullnamerev)
+ new_license_names.append(license_name)
+ self.ebuild_license_names = new_license_names
+
+ # The ebuild license field can look like:
+ # LICENSE="GPL-3 LGPL-3 Apache-2.0" (this means AND, as in all 3)
+ # for third_party/portage-stable/app-admin/rsyslog/rsyslog-5.8.11.ebuild
+ # LICENSE="|| ( LGPL-2.1 MPL-1.1 )"
+ # for third_party/portage-stable/x11-libs/cairo/cairo-1.8.8.ebuild
+
+ # The parser isn't very smart and only has basic support for the
+ # || ( X Y ) OR logic to do the following:
+ # In order to save time needlessly unpacking packages and looking or a
+ # cleartext license (which is really a crapshoot), if we have a license
+ # like BSD that requires looking for copyright attribution, but we can
+ # chose another license like GPL, we do that.
+
+ if not self.skip and not self.ebuild_license_names:
+ logging.error("%s: no license found in ebuild. FIXME!", self.fullnamerev)
+ # In a bind, you could comment this out. I'm making the output fail to
+ # get your attention since this error really should be fixed, but if you
+ # comment out the next line, the script will try to find a license inside
+ # the source.
+ raise PackageLicenseError()
+
+ # This is not invalid, but the parser can't deal with it, so if it ever
+ # happens, error out to tell the programmer to do something.
+ # dev-python/pycairo-1.10.0-r4: LGPL-3 || ( LGPL-2.1 MPL-1.1 )
+ if "||" in self.ebuild_license_names[1:]:
+ logging.error("%s: Can't parse || in the middle of a license: %s",
+ self.fullnamerev, ' '.join(self.ebuild_license_names))
+ raise PackageLicenseError()
+
+ or_licenses_and_one_is_no_attribution = False
+ # We do a quick early pass first so that the longer pass below can
+ # run accordingly.
+ for license_name in [x for x in self.ebuild_license_names
+ if x not in LICENCES_IGNORE]:
+ # Here we have an OR case, and one license that we can use stock, so
+ # we remember that in order to be able to skip license attributions if
+ # any were in the OR.
+ if (self.ebuild_license_names[0] == "||" and
+ license_name not in COPYRIGHT_ATTRIBUTION_LICENSES):
+ or_licenses_and_one_is_no_attribution = True
+
+ for license_name in [x for x in self.ebuild_license_names
+ if x not in LICENCES_IGNORE]:
+ # Licenses like BSD or MIT can't be used as is because they do not contain
+ # copyright self. They have to be replaced by copyright file given in the
+ # source code, or manually mapped by us in PACKAGE_LICENSES
+ if license_name in COPYRIGHT_ATTRIBUTION_LICENSES:
+ # To limit needless efforts, if a package is BSD or GPL, we ignore BSD
+ # and use GPL to avoid scanning the package, but we can only do this if
+ # or_licenses_and_one_is_no_attribution has been set above.
+ # This ensures that if we have License: || (BSD3 BSD4), we will
+ # look in the source.
+ if or_licenses_and_one_is_no_attribution:
+ logging.info("%s: ignore license %s because ebuild LICENSES had %s",
+ self.fullnamerev, license_name,
+ ' '.join(self.ebuild_license_names))
+ else:
+ logging.info("%s: can't use %s, will scan source code for copyright",
+ self.fullnamerev, license_name)
+ self.need_copyright_attribution = True
+ self.scan_source_for_licenses = True
+ else:
+ self.license_names.add(license_name)
+ # We can't display just 2+ because it only contains text that says to
+ # read v2 or v3.
+ if license_name == 'GPL-2+':
+ self.license_names.add('GPL-2')
+ if license_name == 'LGPL-2+':
+ self.license_names.add('LGPL-2')
+
+ if license_name in LOOK_IN_SOURCE_LICENSES:
+ logging.info("%s: Got %s, will try to find better license in source...",
+ self.fullnamerev, license_name)
+ self.scan_source_for_licenses = True
+
+ if self.license_names:
+ logging.info('%s: using stock|cust license(s) %s',
+ self.fullnamerev, ','.join(self.license_names))
+
+ # If the license(s) could not be found, or one requires copyright
+ # attribution, dig in the source code for license files:
+ # For instance:
+ # Read licenses from ebuild for net-dialup/ppp-2.4.5-r3: BSD,GPL-2
+ # We need get the substitution file for BSD and add it to GPL.
+ if self.scan_source_for_licenses:
+ self._ExtractLicenses()
+
+ # This shouldn't run, but leaving as sanity check.
+ if not self.license_names and not self.license_text_scanned:
+ raise AssertionError("Didn't find usable licenses for %s" %
+ self.fullnamerev)
+
+
+class Licensing(object):
+ """Do the actual work of extracting licensing info and outputting html."""
+
+ def __init__(self, board, package_fullnames, gen_licenses):
+ # eg x86-alex
+ self.board = board
+ # List of stock and custom licenses referenced in ebuilds. Used to
+ # print a report. Dict value says which packages use that license.
+ self.licenses = {}
+
+ # Licenses are supposed to be generated at package build time and be
+ # ready for us, but in case they're not, they can be generated.
+ self.gen_licenses = gen_licenses
+
+ # This keeps track of whether we have an incomplete license file due to
+ # package errors during parsing.
+ # Any non empty list at the end shows the list of packages that caused
+ # errors.
+ self.incomplete_packages = []
+
+ self.package_text = {}
+ self.entry_template = None
+
+ # We need to have a dict for the list of packages objects, index by package
+ # fullnamerev, so that when we scan our licenses at the end, and find out
+ # some shared licenses are only used by one package, we can access that
+ # package object by name, and add the license directly in that object.
+ self.packages = {}
+ self._package_fullnames = package_fullnames
+
+ @property
+ def sorted_licenses(self):
+ return sorted(self.licenses.keys(), key=str.lower)
+
+ def _SaveLicenseDump(self, pkg):
+ if pkg.build_source_tree:
+ save_file = "%s/build-info/license.yaml" % pkg.build_source_tree
+ else:
+ save_file = pkg.license_dump_path
+ logging.debug("Saving license to %s", save_file)
+ save_dir = os.path.dirname(save_file)
+ if not os.path.isdir(save_dir):
+ os.makedirs(save_dir, 0755)
+ with open(save_file, "w") as f:
+ yaml_dump = []
+ for key, value in pkg.__dict__.items():
+ yaml_dump.append([key, value])
+ f.write(yaml.dump(yaml_dump))
+
+ def _LoadLicenseDump(self, pkg):
+ save_file = pkg.license_dump_path
+ logging.debug("Getting license from %s for %s", save_file, pkg.name)
+ with open(save_file, "r") as f:
+ # yaml.safe_load barfs on unicode it output, but we don't really need it.
+ yaml_dump = yaml.load(f)
+ for key, value in yaml_dump:
+ pkg.__dict__[key] = value
+
+ def LicensedPackages(self, license_name):
+ """Return list of packages using a given license."""
+ return self.licenses[license_name]
+
+ def LoadPackageInfo(self, board):
+ """Populate basic package info for all packages from their ebuild."""
+ for package_name in self._package_fullnames:
+ pkg = PackageInfo()
+ pkg.board = board
+ pkg.GetPackageInfo(package_name)
+ self.packages[package_name] = pkg
+
+ def HookPackageProcess(self, pkg_build_path):
+ """Different entry point to populate a packageinfo.
+
+ This is called instead of LoadPackageInfo when called by a package build.
+
+ Args:
+ pkg_build_path: unpacked being built by emerge.
+ """
+ pkg = PackageInfo()
+ pkg.build_source_tree = pkg_build_path
+ pkg.GetPackageInfo(None)
+ if not pkg.skip:
+ pkg.GetLicenses()
+ self._SaveLicenseDump(pkg)
+
+ def ProcessPackageLicenses(self):
+ """Iterate through all packages provided and gather their licenses.
+
+ GetLicenses will scrape licenses from the code and/or gather stock license
+ names. We gather the list of stock and custom ones for later processing.
+
+ Do not call this after adding virtual packages with AddExtraPkg.
+ """
+ for package_name in self.packages:
+ pkg = self.packages[package_name]
+ if pkg.skip:
+ if self.gen_licenses:
+ logging.info("Package %s is in skip list", package_name)
+ else:
+ # If we do a licensing run expecting to get licensing objects from
+ # an image build, virtual packages will be missing such objects
+ # because virtual packages do not get the install hook run at build
+ # time. Because this script may not have permissions to write in the
+ # /var/db/ directory, we don't want it to generate useless license
+ # bits for virtual packages. As a result, ignore virtual packages
+ # here.
+ if pkg.category == "virtual":
+ logging.debug("Ignoring %s virtual package", package_name)
+ continue
+
+ # Other skipped packages get dumped with incomplete info and the skip flag
+ if not os.path.exists(pkg.license_dump_path) and not self.gen_licenses:
+ logging.warning(">>> License for %s is missing, creating now <<<",
+ package_name)
+ if not os.path.exists(pkg.license_dump_path) or self.gen_licenses:
+ if not pkg.skip:
+ try:
+ pkg.GetLicenses()
+ except PackageLicenseError:
+ pkg.licensing_failed = True
+ # We dump packages where licensing failed too.
+ self._SaveLicenseDump(pkg)
+
+ # To debug the code, we force the data to be re-read from the dumps
+ # instead of reusing what we may have in memory.
+ for package_name in self.packages:
+ pkg = self.packages[package_name]
+ if pkg.category == "virtual":
+ continue
+
+ self._LoadLicenseDump(pkg)
+ logging.debug("loaded dump for %s", pkg.fullnamerev)
+ if pkg.skip:
+ logging.info("Package %s is in skip list", pkg.fullnamerev)
+ if pkg.licensing_failed:
+ logging.info("Package %s failed licensing", pkg.fullnamerev)
+ self.incomplete_packages += [pkg.fullnamerev]
+
+ def AddExtraPkg(self, pkg_data):
+ """Allow adding pre-created virtual packages.
+
+ GetLicenses will not work on them, so add them after having run
+ ProcessPackages.
+
+ Args:
+ pkg_data: array of package data as defined below
+ """
+ pkg = PackageInfo()
+ pkg.board = self.board
+ pkg.category = pkg_data[0]
+ pkg.name = pkg_data[1]
+ pkg.version = pkg_data[2]
+ pkg.homepages = pkg_data[3] # this is a list
+ pkg.license_names = pkg_data[4] # this is also a list
+ pkg.ebuild_license_names = pkg_data[4]
+ self.packages[pkg.fullnamerev] = pkg
+
+ # Called directly by src/repohooks/pre-upload.py
+ @staticmethod
+ def FindLicenseType(license_name):
+ """Says if a license is stock Gentoo, custom, or doesn't exist."""
+
+ for directory in STOCK_LICENSE_DIRS:
+ path = '%s/%s' % (directory, license_name)
+ if os.path.exists(path):
+ return "Gentoo Package Stock"
+
+ for directory in CUSTOM_LICENSE_DIRS:
+ path = '%s/%s' % (directory, license_name)
+ if os.path.exists(path):
+ return "Custom"
+
+ if license_name in SKIPPED_LICENSES:
+ return "Custom"
+
+ raise AssertionError("""
+license %s could not be found in %s
+If the license in the ebuild is correct,
+a) a stock license should be added to portage-stable/licenses :
+running `cros_portage_upgrade` inside of the chroot should clone this repo
+to /tmp/portage/:
+https://chromium.googlesource.com/chromiumos/overlays/portage/+/gentoo
+find the new licenses under licenses, and add them to portage-stable/licenses
+
+b) if it's a non gentoo package with a custom license, you can copy that license
+to third_party/chromiumos-overlay/licenses/
+
+Try re-running the script with -p cat/package-ver --generate
+after fixing the license.""" %
+ (license_name,
+ '\n'.join(STOCK_LICENSE_DIRS + CUSTOM_LICENSE_DIRS))
+ )
+
+ @staticmethod
+ def ReadSharedLicense(license_name):
+ """Read and return stock or cust license file specified in an ebuild."""
+
+ license_path = None
+ for directory in STOCK_LICENSE_DIRS + CUSTOM_LICENSE_DIRS:
+ path = os.path.join(directory, license_name)
+ if os.path.exists(path):
+ license_path = path
+ break
+
+ if license_path:
+ return ReadUnknownEncodedFile(license_path, "read license")
+ else:
+ raise AssertionError("license %s could not be found in %s"
+ % (license_name,
+ '\n'.join(STOCK_LICENSE_DIRS +
+ CUSTOM_LICENSE_DIRS))
+ )
+
+ @staticmethod
+ def EvaluateTemplate(template, env):
+ """Expand a template with vars like {{foo}} using a dict of expansions."""
+ # TODO switch to stock python templates.
+ for key, val in env.iteritems():
+ template = template.replace('{{%s}}' % key, val)
+ return template
+
+ def _GeneratePackageLicenseText(self, pkg):
+ """Concatenate all licenses related to a pkg.
+
+ This means a combination of ebuild shared licenses and licenses read from
+ the pkg source tree, if any.
+
+ Args:
+ pkg: PackageInfo object
+
+ Raises:
+ AssertionError: on runtime errors
+ """
+ license_text = []
+ for license_text_scanned in pkg.license_text_scanned:
+ license_text.append(license_text_scanned)
+ license_text.append('%s\n' % ('-=' * 40))
+
+ license_pointers = []
+ # sln: shared license name.
+ for sln in pkg.license_names:
+ # Says whether it's a stock gentoo or custom license.
+ license_type = self.FindLicenseType(sln)
+ license_pointers.append(
+ "<li><a href='#%s'>%s License %s</a></li>" % (
+ sln, license_type, sln))
+
+ # This should get caught earlier, but one extra check.
+ if not license_text + license_pointers:
+ raise AssertionError('Ended up with no license_text for %s', pkg.name)
+
+ env = {
+ 'name': "%s-%s" % (pkg.name, pkg.version),
+ 'url': cgi.escape(pkg.homepages[0]) if pkg.homepages else '',
+ 'licenses_txt': cgi.escape('\n'.join(license_text)) or '',
+ 'licenses_ptr': '\n'.join(license_pointers) or '',
+ }
+ self.package_text[pkg] = self.EvaluateTemplate(self.entry_template, env)
+
+ def GenerateHTMLLicenseOutput(self, output_file,
+ output_template=TMPL,
+ entry_template=ENTRY_TMPL,
+ license_template=SHARED_LICENSE_TMPL):
+ """Generate the combined html license file used in ChromeOS.
+
+ Args:
+ output_file: resulting HTML license output.
+ output_template: template for the entire HTML file.
+ entry_template: template for per package entries.
+ license_template: template for shared license entries.
+ """
+ self.entry_template = ReadUnknownEncodedFile(entry_template)
+ sorted_license_txt = []
+
+ # Keep track of which licenses are used by which packages.
+ for pkg in self.packages.values():
+ if pkg.skip or pkg.licensing_failed:
+ continue
+ for sln in pkg.license_names:
+ self.licenses.setdefault(sln, []).append(pkg.fullnamerev)
+
+ # Find licenses only used once, and roll them in the package that uses them.
+ # We use keys() because licenses is modified in the loop, so we can't use
+ # an iterator.
+ for sln in self.licenses.keys():
+ if len(self.licenses[sln]) == 1:
+ pkg_fullnamerev = self.licenses[sln][0]
+ logging.info("Collapsing shared license %s into single use license "
+ "(only used by %s)", sln, pkg_fullnamerev)
+ license_type = self.FindLicenseType(sln)
+ license_txt = self.ReadSharedLicense(sln)
+ single_license = "%s License %s:\n\n%s" % (license_type, sln,
+ license_txt)
+ pkg = self.packages[pkg_fullnamerev]
+ pkg.license_text_scanned.append(single_license)
+ pkg.license_names.remove(sln)
+ del self.licenses[sln]
+
+ for pkg in sorted(self.packages.values(),
+ key=lambda x: (x.name.lower(), x.version, x.revision)):
+ if pkg.skip:
+ logging.debug("Skipping package %s", pkg.fullnamerev)
+ continue
+ if pkg.licensing_failed:
+ logging.debug("Package %s failed licensing, skipping", pkg.fullnamerev)
+ continue
+ self._GeneratePackageLicenseText(pkg)
+ sorted_license_txt += [self.package_text[pkg]]
+
+ # Now generate the bottom of the page that will contain all the shared
+ # licenses and a list of who is pointing to them.
+ license_template = ReadUnknownEncodedFile(license_template)
+
+ licenses_txt = []
+ for license_name in self.sorted_licenses:
+ env = {
+ 'license_name': license_name,
+ 'license': cgi.escape(self.ReadSharedLicense(license_name)),
+ 'license_type': self.FindLicenseType(license_name),
+ 'license_packages': ' '.join(self.LicensedPackages(license_name)),
+ }
+ licenses_txt += [self.EvaluateTemplate(license_template, env)]
+
+ file_template = ReadUnknownEncodedFile(output_template)
+ env = {
+ 'entries': '\n'.join(sorted_license_txt),
+ 'licenses': '\n'.join(licenses_txt),
+ }
+ osutils.WriteFile(output_file,
+ self.EvaluateTemplate(file_template, env).encode('UTF-8'))
+
+
+def ListInstalledPackages(board, all_packages=False):
+ """Return a list of all packages installed for a particular board."""
+
+ # If all_packages is set to True, all packages visible in the build
+ # chroot are used to generate the licensing file. This is not what you want
+ # for a release license file, but it's a way to run licensing checks against
+ # all packages.
+ # If it's set to False, it will only generate a licensing file that contains
+ # packages used for a release build (as determined by the dependencies for
+ # virtual/target-os).
+
+ if all_packages:
+ # The following returns all packages that were part of the build tree
+ # (many get built or used during the build, but do not get shipped).
+ # Note that it also contains packages that are in the build as
+ # defined by build_packages but not part of the image we ship.
+ args = ["equery-%s" % board, "list", "*"]
+ packages = cros_build_lib.RunCommand(args, print_cmd=debug,
+ redirect_stdout=True
+ ).output.splitlines()
+ else:
+ # The following returns all packages that were part of the build tree
+ # (many get built or used during the build, but do not get shipped).
+ # Note that it also contains packages that are in the build as
+ # defined by build_packages but not part of the image we ship.
+ args = ["emerge-%s" % board, "--with-bdeps=y", "--usepkgonly",
+ "--emptytree", "--pretend", "--color=n", "virtual/target-os"]
+ emerge = cros_build_lib.RunCommand(args, print_cmd=debug,
+ redirect_stdout=True).output.splitlines()
+ # Another option which we've decided not to use, is bdeps=n. This outputs
+ # just the packages we ship, but does not packages that were used to build
+ # them, including a package like flex which generates a .a that is included
+ # and shipped in ChromeOS.
+ # We've decided to credit build packages, even if we're not legally required
+ # to (it's always nice to do), and that way we get corner case packages like
+ # flex. This is why we use bdep=y and not bdep=n.
+
+ packages = []
+ # [binary R ] x11-libs/libva-1.1.1 to /build/x86-alex/
+ pkg_rgx = re.compile(r'\[[^]]+R[^]]+\] (.+) to /build/.*')
+ # If we match something else without the 'R' like
+ # [binary U ] chromeos-base/pepper-flash-13.0.0.133-r1 [12.0.0.77-r1]
+ # this is bad and we should die on this.
+ pkg_rgx2 = re.compile(r'(\[[^]]+\] .+) to /build/.*')
+ for line in emerge:
+ match = pkg_rgx.search(line)
+ match2 = pkg_rgx2.search(line)
+ if match:
+ packages.append(match.group(1))
+ elif match2:
+ raise AssertionError("Package incorrectly installed, try eclean-%s" %
+ board, "\n%s" % match2.group(1))
+
+ return packages
+
+
+def _HandleIllegalXMLChars(text):
+ """Handles illegal XML Characters.
+
+ XML 1.0 acceptable character range:
+ Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | \
+ [#x10000-#x10FFFF]
+
+ This function finds all illegal characters in the text and filters
+ out all whitelisted characters (e.g. ^L).
+
+ Args:
+ text: text to examine.
+
+ Returns:
+ Filtered |text| and a list of non-whitelisted illegal characters found.
+ """
+ whitelist_re = re.compile(u'[\x0c]')
+ text = whitelist_re.sub('', text)
+ # illegal_chars_re includes all illegal characters (whitelisted or
+ # not), so we can expand the whitelist without modifying this line.
+ illegal_chars_re = re.compile(
+ u'[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]')
+ return (text, illegal_chars_re.findall(text))
+
+
+def ReadUnknownEncodedFile(file_path, logging_text=None):
+ """Read a file of unknown encoding (UTF-8 or latin) by trying in sequence.
+
+ Args:
+ file_path: what to read.
+ logging_text: what to display for logging depending on file read.
+
+ Returns:
+ File content, possibly converted from latin1 to UTF-8.
+
+ Raises:
+ Assertion error: if non-whitelisted illegal XML characters
+ are found in the file.
+ ValueError: returned if we get invalid XML.
+ """
+ try:
+ with codecs.open(file_path, encoding="utf-8") as c:
+ file_txt = c.read()
+ if logging_text:
+ logging.info("%s %s (UTF-8)", logging_text, file_path)
+ except UnicodeDecodeError:
+ with codecs.open(file_path, encoding="latin1") as c:
+ file_txt = c.read()
+ if logging_text:
+ logging.info("%s %s (latin1)", logging_text, file_path)
+
+ file_txt, char_list = _HandleIllegalXMLChars(file_txt)
+
+ if char_list:
+ raise ValueError('Illegal XML characters %s found in %s.' %
+ (char_list, file_path))
+
+ return file_txt