# Copyright 2020 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
################################################################################
#!/usr/bin/env python3
"""Starts project build on Google Cloud Builder.

Usage: build_project.py <project_dir>
"""

from __future__ import print_function

import argparse
import collections
import datetime
import json
import logging
import os
import posixpath
import re
import sys

from googleapiclient.discovery import build as cloud_build
import oauth2client.client
import six
import yaml

import build_lib

FUZZING_BUILD_TYPE = 'fuzzing'

GCB_LOGS_BUCKET = 'oss-fuzz-gcb-logs'

DEFAULT_ARCHITECTURES = ['x86_64']
DEFAULT_ENGINES = ['libfuzzer', 'afl', 'honggfuzz']
DEFAULT_SANITIZERS = ['address', 'undefined']

LATEST_VERSION_FILENAME = 'latest.version'
LATEST_VERSION_CONTENT_TYPE = 'text/plain'

QUEUE_TTL_SECONDS = 60 * 60 * 24  # 24 hours.

PROJECTS_DIR = os.path.abspath(
    os.path.join(__file__, os.path.pardir, os.path.pardir, os.path.pardir,
                 os.path.pardir, 'projects'))

DEFAULT_GCB_OPTIONS = {'machineType': 'N1_HIGHCPU_32'}

Config = collections.namedtuple(
    'Config', ['testing', 'test_image_suffix', 'branch', 'parallel'])

WORKDIR_REGEX = re.compile(r'\s*WORKDIR\s*([^\s]+)')


class Build:  # pylint: disable=too-few-public-methods
  """Class representing the configuration for a build."""

  def __init__(self, fuzzing_engine, sanitizer, architecture):
    self.fuzzing_engine = fuzzing_engine
    self.sanitizer = sanitizer
    self.architecture = architecture
    self.targets_list_filename = build_lib.get_targets_list_filename(
        self.sanitizer)

  @property
  def out(self):
    """Returns the out directory for the build."""
    return posixpath.join(
        '/workspace/out/',
        f'{self.fuzzing_engine}-{self.sanitizer}-{self.architecture}')


def get_project_data(project_name):
  """Returns a tuple containing the contents of the project.yaml and Dockerfile
  of |project_name|. Raises a FileNotFoundError if there is no Dockerfile for
  |project_name|."""
  project_dir = os.path.join(PROJECTS_DIR, project_name)
  dockerfile_path = os.path.join(project_dir, 'Dockerfile')
  try:
    with open(dockerfile_path) as dockerfile:
      dockerfile = dockerfile.read()
  except FileNotFoundError:
    logging.error('Project "%s" does not have a dockerfile.', project_name)
    raise
  project_yaml_path = os.path.join(project_dir, 'project.yaml')
  with open(project_yaml_path, 'r') as project_yaml_file_handle:
    project_yaml_contents = project_yaml_file_handle.read()
  return project_yaml_contents, dockerfile


class Project:  # pylint: disable=too-many-instance-attributes
  """Class representing an OSS-Fuzz project."""

  def __init__(self, name, project_yaml_contents, dockerfile, image_project):
    project_yaml = yaml.safe_load(project_yaml_contents)
    self.name = name
    self.image_project = image_project
    self.workdir = workdir_from_dockerfile(dockerfile)
    set_yaml_defaults(project_yaml)
    self._sanitizers = project_yaml['sanitizers']
    self.disabled = project_yaml['disabled']
    self.architectures = project_yaml['architectures']
    self.fuzzing_engines = project_yaml['fuzzing_engines']
    self.coverage_extra_args = project_yaml['coverage_extra_args']
    self.labels = project_yaml['labels']
    self.fuzzing_language = project_yaml['language']
    self.run_tests = project_yaml['run_tests']

  @property
  def sanitizers(self):
    """Returns processed sanitizers."""
    assert isinstance(self._sanitizers, list)
    processed_sanitizers = []
    for sanitizer in self._sanitizers:
      if isinstance(sanitizer, six.string_types):
        processed_sanitizers.append(sanitizer)
      elif isinstance(sanitizer, dict):
        for key in sanitizer.keys():
          processed_sanitizers.append(key)

    return processed_sanitizers

  @property
  def image(self):
    """Returns the docker image for the project."""
    return f'gcr.io/{self.image_project}/{self.name}'


def get_last_step_id(steps):
  """Returns the id of the last step in |steps|."""
  return steps[-1]['id']


def set_yaml_defaults(project_yaml):
  """Sets project.yaml's default parameters."""
  project_yaml.setdefault('disabled', False)
  project_yaml.setdefault('architectures', DEFAULT_ARCHITECTURES)
  project_yaml.setdefault('sanitizers', DEFAULT_SANITIZERS)
  project_yaml.setdefault('fuzzing_engines', DEFAULT_ENGINES)
  project_yaml.setdefault('run_tests', True)
  project_yaml.setdefault('coverage_extra_args', '')
  project_yaml.setdefault('labels', {})


def is_supported_configuration(build):
  """Check if the given configuration is supported."""
  fuzzing_engine_info = build_lib.ENGINE_INFO[build.fuzzing_engine]
  if build.architecture == 'i386' and build.sanitizer != 'address':
    return False
  return (build.sanitizer in fuzzing_engine_info.supported_sanitizers and
          build.architecture in fuzzing_engine_info.supported_architectures)


def workdir_from_dockerfile(dockerfile):
  """Parses WORKDIR from the Dockerfile."""
  dockerfile_lines = dockerfile.split('\n')
  for line in dockerfile_lines:
    match = re.match(WORKDIR_REGEX, line)
    if match:
      # We need to escape '$' since they're used for subsitutions in Container
      # Builer builds.
      return match.group(1).replace('$', '$$')

  return '/src'


def get_datetime_now():
  """Returns datetime.datetime.now(). Used for mocking."""
  return datetime.datetime.now()


def get_env(fuzzing_language, build):
  """Returns an environment for building. The environment is returned as a list
  and is suitable for use as the "env" parameter in a GCB build step. The
  environment variables are based on the values of |fuzzing_language| and
  |build."""
  env_dict = {
      'FUZZING_LANGUAGE': fuzzing_language,
      'FUZZING_ENGINE': build.fuzzing_engine,
      'SANITIZER': build.sanitizer,
      'ARCHITECTURE': build.architecture,
      # Set HOME so that it doesn't point to a persisted volume (see
      # https://github.com/google/oss-fuzz/issues/6035).
      'HOME': '/root',
      'OUT': build.out,
  }
  return list(sorted([f'{key}={value}' for key, value in env_dict.items()]))


def get_compile_step(project, build, env, parallel):
  """Returns the GCB step for compiling |projects| fuzzers using |env|. The type
  of build is specified by |build|."""
  failure_msg = (
      '*' * 80 + '\nFailed to build.\nTo reproduce, run:\n'
      f'python infra/helper.py build_image {project.name}\n'
      'python infra/helper.py build_fuzzers --sanitizer '
      f'{build.sanitizer} --engine {build.fuzzing_engine} --architecture '
      f'{build.architecture} {project.name}\n' + '*' * 80)
  compile_step = {
      'name': project.image,
      'env': env,
      'args': [
          'bash',
          '-c',
          # Remove /out to make sure there are non instrumented binaries.
          # `cd /src && cd {workdir}` (where {workdir} is parsed from the
          # Dockerfile). Container Builder overrides our workdir so we need
          # to add this step to set it back.
          (f'rm -r /out && cd /src && cd {project.workdir} && '
           f'mkdir -p {build.out} && compile || '
           f'(echo "{failure_msg}" && false)'),
      ],
      'id': get_id('compile', build),
  }
  if parallel:
    maybe_add_parallel(compile_step, build_lib.get_srcmap_step_id(), parallel)
  return compile_step


def maybe_add_parallel(step, wait_for_id, parallel):
  """Makes |step| run immediately after |wait_for_id| if |parallel|. Mutates
  |step|."""
  if not parallel:
    return
  step['waitFor'] = wait_for_id


def get_id(step_type, build):
  """Returns a unique step id based on |step_type| and |build|. Useful for
  parallelizing builds."""
  return (f'{step_type}-{build.fuzzing_engine}-{build.sanitizer}'
          f'-{build.architecture}')


def get_build_steps(  # pylint: disable=too-many-locals, too-many-statements, too-many-branches, too-many-arguments
    project_name, project_yaml_contents, dockerfile, image_project,
    base_images_project, config):
  """Returns build steps for project."""

  project = Project(project_name, project_yaml_contents, dockerfile,
                    image_project)

  if project.disabled:
    logging.info('Project "%s" is disabled.', project.name)
    return []

  timestamp = get_datetime_now().strftime('%Y%m%d%H%M')

  build_steps = build_lib.project_image_steps(
      project.name,
      project.image,
      project.fuzzing_language,
      branch=config.branch,
      test_image_suffix=config.test_image_suffix)

  # Sort engines to make AFL first to test if libFuzzer has an advantage in
  # finding bugs first since it is generally built first.
  for fuzzing_engine in sorted(project.fuzzing_engines):
    for sanitizer in project.sanitizers:
      for architecture in project.architectures:
        build = Build(fuzzing_engine, sanitizer, architecture)
        if not is_supported_configuration(build):
          continue

        env = get_env(project.fuzzing_language, build)
        compile_step = get_compile_step(project, build, env, config.parallel)
        build_steps.append(compile_step)

        if project.run_tests:
          failure_msg = (
              '*' * 80 + '\nBuild checks failed.\n'
              'To reproduce, run:\n'
              f'python infra/helper.py build_image {project.name}\n'
              'python infra/helper.py build_fuzzers --sanitizer '
              f'{build.sanitizer} --engine {build.fuzzing_engine} '
              f'--architecture {build.architecture} {project.name}\n'
              'python infra/helper.py check_build --sanitizer '
              f'{build.sanitizer} --engine {build.fuzzing_engine} '
              f'--architecture {build.architecture} {project.name}\n' +
              '*' * 80)
          # Test fuzz targets.
          test_step = {
              'name':
                  get_runner_image_name(base_images_project,
                                        config.test_image_suffix),
              'env':
                  env,
              'args': [
                  'bash', '-c',
                  f'test_all.py || (echo "{failure_msg}" && false)'
              ],
              'id':
                  get_id('build-check', build)
          }
          maybe_add_parallel(test_step, get_last_step_id(build_steps),
                             config.parallel)
          build_steps.append(test_step)

        if project.labels:
          # Write target labels.
          build_steps.append({
              'name':
                  project.image,
              'env':
                  env,
              'args': [
                  '/usr/local/bin/write_labels.py',
                  json.dumps(project.labels),
                  build.out,
              ],
          })

        if build.sanitizer == 'dataflow' and build.fuzzing_engine == 'dataflow':
          dataflow_steps = dataflow_post_build_steps(project.name, env,
                                                     base_images_project,
                                                     config.testing,
                                                     config.test_image_suffix)
          if dataflow_steps:
            build_steps.extend(dataflow_steps)
          else:
            sys.stderr.write('Skipping dataflow post build steps.\n')

        build_steps.extend([
            # Generate targets list.
            {
                'name':
                    get_runner_image_name(base_images_project,
                                          config.test_image_suffix),
                'env':
                    env,
                'args': [
                    'bash', '-c',
                    f'targets_list > /workspace/{build.targets_list_filename}'
                ],
            }
        ])
        upload_steps = get_upload_steps(project, build, timestamp,
                                        base_images_project, config.testing)
        build_steps.extend(upload_steps)

  return build_steps


def get_targets_list_upload_step(bucket, project, build, uploader_image):
  """Returns the step to upload targets_list for |build| of |project| to
  |bucket|."""
  targets_list_url = build_lib.get_signed_url(
      build_lib.get_targets_list_url(bucket, project.name, build.sanitizer))
  return {
      'name': uploader_image,
      'args': [
          f'/workspace/{build.targets_list_filename}',
          targets_list_url,
      ],
  }


def get_uploader_image(base_images_project):
  """Returns the uploader base image in |base_images_project|."""
  return f'gcr.io/{base_images_project}/uploader'


def get_upload_steps(project, build, timestamp, base_images_project, testing):
  """Returns the steps for uploading the fuzzer build specified by |project| and
  |build|. Uses |timestamp| for naming the uploads. Uses |base_images_project|
  and |testing| for determining which image to use for the upload."""
  bucket = build_lib.get_upload_bucket(build.fuzzing_engine, build.architecture,
                                       testing)
  stamped_name = '-'.join([project.name, build.sanitizer, timestamp])
  zip_file = stamped_name + '.zip'
  upload_url = build_lib.get_signed_url(
      build_lib.GCS_UPLOAD_URL_FORMAT.format(bucket, project.name, zip_file))
  stamped_srcmap_file = stamped_name + '.srcmap.json'
  srcmap_url = build_lib.get_signed_url(
      build_lib.GCS_UPLOAD_URL_FORMAT.format(bucket, project.name,
                                             stamped_srcmap_file))
  latest_version_file = '-'.join(
      [project.name, build.sanitizer, LATEST_VERSION_FILENAME])
  latest_version_url = build_lib.GCS_UPLOAD_URL_FORMAT.format(
      bucket, project.name, latest_version_file)
  latest_version_url = build_lib.get_signed_url(
      latest_version_url, content_type=LATEST_VERSION_CONTENT_TYPE)
  uploader_image = get_uploader_image(base_images_project)

  upload_steps = [
      # Zip binaries.
      {
          'name': project.image,
          'args': ['bash', '-c', f'cd {build.out} && zip -r {zip_file} *'],
      },
      # Upload srcmap.
      {
          'name': uploader_image,
          'args': [
              '/workspace/srcmap.json',
              srcmap_url,
          ],
      },
      # Upload binaries.
      {
          'name': uploader_image,
          'args': [
              os.path.join(build.out, zip_file),
              upload_url,
          ],
      },
      # Upload targets list.
      get_targets_list_upload_step(bucket, project, build, uploader_image),
      # Upload the latest.version file.
      build_lib.http_upload_step(zip_file, latest_version_url,
                                 LATEST_VERSION_CONTENT_TYPE),
      # Cleanup.
      get_cleanup_step(project, build),
  ]
  return upload_steps


def get_cleanup_step(project, build):
  """Returns the step for cleaning up after doing |build| of |project|."""
  return {
      'name': project.image,
      'args': [
          'bash',
          '-c',
          'rm -r ' + build.out,
      ],
  }


def get_runner_image_name(base_images_project, test_image_suffix):
  """Returns the runner image that should be used, based on
  |base_images_project|. Returns the testing image if |test_image_suffix|."""
  image = f'gcr.io/{base_images_project}/base-runner'
  if test_image_suffix:
    image += '-' + test_image_suffix
  return image


def dataflow_post_build_steps(project_name, env, base_images_project, testing,
                              test_image_suffix):
  """Appends dataflow post build steps."""
  steps = build_lib.download_corpora_steps(project_name, testing)
  if not steps:
    return None

  steps.append({
      'name':
          get_runner_image_name(base_images_project, test_image_suffix),
      'env':
          env + [
              'COLLECT_DFT_TIMEOUT=2h',
              'DFT_FILE_SIZE_LIMIT=65535',
              'DFT_MIN_TIMEOUT=2.0',
              'DFT_TIMEOUT_RANGE=6.0',
          ],
      'args': [
          'bash', '-c',
          ('for f in /corpus/*.zip; do unzip -q $f -d ${f%%.*}; done && '
           'collect_dft || (echo "DFT collection failed." && false)')
      ],
      'volumes': [{
          'name': 'corpus',
          'path': '/corpus'
      }],
  })
  return steps


def get_logs_url(build_id, cloud_project='oss-fuzz'):
  """Returns url where logs are displayed for the build."""
  return ('https://console.cloud.google.com/logs/viewer?'
          f'resource=build%2Fbuild_id%2F{build_id}&project={cloud_project}')


def get_gcb_url(build_id, cloud_project='oss-fuzz'):
  """Returns url where logs are displayed for the build."""
  return (f'https://console.cloud.google.com/cloud-build/builds/{build_id}'
          f'?project={cloud_project}')


# pylint: disable=no-member
def run_build(oss_fuzz_project,
              build_steps,
              credentials,
              build_type,
              cloud_project='oss-fuzz'):
  """Run the build for given steps on cloud build. |build_steps| are the steps
  to run. |credentials| are are used to authenticate to GCB and build in
  |cloud_project|. |oss_fuzz_project| and |build_type| are used to tag the build
  in GCB so the build can be queried for debugging purposes."""
  options = {}
  if 'GCB_OPTIONS' in os.environ:
    options = yaml.safe_load(os.environ['GCB_OPTIONS'])
  else:
    options = DEFAULT_GCB_OPTIONS

  tags = [oss_fuzz_project + '-' + build_type, build_type, oss_fuzz_project]
  build_body = {
      'steps': build_steps,
      'timeout': str(build_lib.BUILD_TIMEOUT) + 's',
      'options': options,
      'logsBucket': GCB_LOGS_BUCKET,
      'tags': tags,
      'queueTtl': str(QUEUE_TTL_SECONDS) + 's',
  }

  cloudbuild = cloud_build('cloudbuild',
                           'v1',
                           credentials=credentials,
                           cache_discovery=False)
  build_info = cloudbuild.projects().builds().create(projectId=cloud_project,
                                                     body=build_body).execute()
  build_id = build_info['metadata']['build']['id']

  logging.info('Build ID: %s', build_id)
  logging.info('Logs: %s', get_logs_url(build_id, cloud_project))
  logging.info('Cloud build page: %s', get_gcb_url(build_id, cloud_project))
  return build_id


def get_args(description):
  """Parses command line arguments and returns them. Suitable for a build
  script."""
  parser = argparse.ArgumentParser(sys.argv[0], description=description)
  parser.add_argument('projects', help='Projects.', nargs='+')
  parser.add_argument('--testing',
                      action='store_true',
                      required=False,
                      default=False,
                      help='Upload to testing buckets.')
  parser.add_argument('--test-image-suffix',
                      required=False,
                      default=None,
                      help='Use testing base-images.')
  parser.add_argument('--branch',
                      required=False,
                      default=None,
                      help='Use specified OSS-Fuzz branch.')
  parser.add_argument('--parallel',
                      action='store_true',
                      required=False,
                      default=False,
                      help='Do builds in parallel.')
  return parser.parse_args()


def build_script_main(script_description, get_build_steps_func, build_type):
  """Gets arguments from command line using |script_description| as helpstring
  description. Gets build_steps using |get_build_steps_func| and then runs those
  steps on GCB, tagging the builds with |build_type|. Returns 0 on success, 1 on
  failure."""
  args = get_args(script_description)
  logging.basicConfig(level=logging.INFO)

  image_project = 'oss-fuzz'
  base_images_project = 'oss-fuzz-base'

  credentials = oauth2client.client.GoogleCredentials.get_application_default()
  error = False
  config = Config(args.testing, args.test_image_suffix, args.branch,
                  args.parallel)
  for project_name in args.projects:
    logging.info('Getting steps for: "%s".', project_name)
    try:
      project_yaml_contents, dockerfile_contents = get_project_data(
          project_name)
    except FileNotFoundError:
      logging.error('Couldn\'t get project data. Skipping %s.', project_name)
      error = True
      continue

    steps = get_build_steps_func(project_name, project_yaml_contents,
                                 dockerfile_contents, image_project,
                                 base_images_project, config)
    if not steps:
      logging.error('No steps. Skipping %s.', project_name)
      error = True
      continue

    run_build(project_name, steps, credentials, build_type)
  return 0 if not error else 1


def main():
  """Build and run projects."""
  return build_script_main('Builds a project on GCB.', get_build_steps,
                           FUZZING_BUILD_TYPE)


if __name__ == '__main__':
  sys.exit(main())