aboutsummaryrefslogtreecommitdiff
path: root/infra/ci/build.py
blob: f71799bb2d2c6c8ba977cf1ec8011ac88bd817c0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
#!/usr/bin/env python
# Copyright 2019 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
################################################################################
"""Build modified projects."""

from __future__ import print_function

import enum
import os
import re
import sys
import subprocess
import yaml

CANARY_PROJECT = 'skcms'

DEFAULT_ARCHITECTURES = ['x86_64']
DEFAULT_ENGINES = ['afl', 'honggfuzz', 'libfuzzer']
DEFAULT_SANITIZERS = ['address', 'undefined']

# Languages from project.yaml that have code coverage support.
LANGUAGES_WITH_COVERAGE_SUPPORT = ['c', 'c++', 'go']


def get_changed_files_output():
  """Returns the output of a git command that discovers changed files."""
  branch_commit_hash = subprocess.check_output(
      ['git', 'merge-base', 'FETCH_HEAD', 'origin/HEAD']).strip().decode()

  return subprocess.check_output(
      ['git', 'diff', '--name-only', branch_commit_hash + '..']).decode()


def get_modified_buildable_projects():
  """Returns a list of all the projects modified in this commit that have a
  build.sh file."""
  git_output = get_changed_files_output()
  projects_regex = '.*projects/(?P<name>.*)/.*\n'
  modified_projects = set(re.findall(projects_regex, git_output))
  projects_dir = os.path.join(get_oss_fuzz_root(), 'projects')
  # Filter out projects without Dockerfile files since new projects and reverted
  # projects frequently don't have them. In these cases we don't want Travis's
  # builds to fail.
  modified_buildable_projects = []
  for project in modified_projects:
    if not os.path.exists(os.path.join(projects_dir, project, 'Dockerfile')):
      print('Project {0} does not have Dockerfile. skipping build.'.format(
          project))
      continue
    modified_buildable_projects.append(project)
  return modified_buildable_projects


def get_oss_fuzz_root():
  """Get the absolute path of the root of the oss-fuzz checkout."""
  script_path = os.path.realpath(__file__)
  return os.path.abspath(
      os.path.dirname(os.path.dirname(os.path.dirname(script_path))))


def execute_helper_command(helper_command):
  """Execute |helper_command| using helper.py."""
  root = get_oss_fuzz_root()
  script_path = os.path.join(root, 'infra', 'helper.py')
  command = ['python', script_path] + helper_command
  print('Running command: %s' % ' '.join(command))
  subprocess.check_call(command)


def build_fuzzers(project, engine, sanitizer, architecture):
  """Execute helper.py's build_fuzzers command on |project|. Build the fuzzers
  with |engine| and |sanitizer| for |architecture|."""
  execute_helper_command([
      'build_fuzzers', project, '--engine', engine, '--sanitizer', sanitizer,
      '--architecture', architecture
  ])


def check_build(project, engine, sanitizer, architecture):
  """Execute helper.py's check_build command on |project|, assuming it was most
  recently built with |engine| and |sanitizer| for |architecture|."""
  execute_helper_command([
      'check_build', project, '--engine', engine, '--sanitizer', sanitizer,
      '--architecture', architecture
  ])


def should_build_coverage(project_yaml):
  """Returns True if a coverage build should be done based on project.yaml
  contents."""
  # Enable coverage builds on projects that use engines. Those that don't use
  # engines shouldn't get coverage builds.
  engines = project_yaml.get('fuzzing_engines', DEFAULT_ENGINES)
  engineless = 'none' in engines
  if engineless:
    assert_message = ('Forbidden to specify multiple engines for '
                      '"fuzzing_engines" if "none" is specified.')
    assert len(engines) == 1, assert_message
    return False

  language = project_yaml.get('language')
  if language not in LANGUAGES_WITH_COVERAGE_SUPPORT:
    print(('Project is written in "{language}", '
           'coverage is not supported yet.').format(language=language))
    return False

  return True


def should_build(project_yaml):
  """Returns True on if the build specified is enabled in the project.yaml."""

  if os.getenv('SANITIZER') == 'coverage':
    # This assumes we only do coverage builds with libFuzzer on x86_64.
    return should_build_coverage(project_yaml)

  def is_enabled(env_var, yaml_name, defaults):
    """Is the value of |env_var| enabled in |project_yaml| (in the |yaml_name|
    section)? Uses |defaults| if |yaml_name| section is unspecified."""
    return os.getenv(env_var) in project_yaml.get(yaml_name, defaults)

  return (is_enabled('ENGINE', 'fuzzing_engines', DEFAULT_ENGINES) and
          is_enabled('SANITIZER', 'sanitizers', DEFAULT_SANITIZERS) and
          is_enabled('ARCHITECTURE', 'architectures', DEFAULT_ARCHITECTURES))


def build_project(project):
  """Do the build of |project| that is specified by the environment variables -
  SANITIZER, ENGINE, and ARCHITECTURE."""
  root = get_oss_fuzz_root()
  project_yaml_path = os.path.join(root, 'projects', project, 'project.yaml')
  with open(project_yaml_path) as file_handle:
    project_yaml = yaml.safe_load(file_handle)

  if project_yaml.get('disabled', False):
    print('Project {0} is disabled, skipping build.'.format(project))
    return

  engine = os.getenv('ENGINE')
  sanitizer = os.getenv('SANITIZER')
  architecture = os.getenv('ARCHITECTURE')

  if not should_build(project_yaml):
    print(('Specified build: engine: {0}, sanitizer: {1}, architecture: {2} '
           'not enabled for this project: {3}. Skipping build.').format(
               engine, sanitizer, architecture, project))

    return

  print('Building project', project)
  build_fuzzers(project, engine, sanitizer, architecture)

  if engine != 'none' and sanitizer != 'coverage':
    check_build(project, engine, sanitizer, architecture)


class BuildModifiedProjectsResult(enum.Enum):
  """Enum containing the return values of build_modified_projects()."""
  NONE_BUILT = 0
  BUILD_SUCCESS = 1
  BUILD_FAIL = 2


def build_modified_projects():
  """Build modified projects. Returns BuildModifiedProjectsResult.NONE_BUILT if
  no builds were attempted. Returns BuildModifiedProjectsResult.BUILD_SUCCESS if
  all attempts succeed, otherwise returns
  BuildModifiedProjectsResult.BUILD_FAIL."""
  projects = get_modified_buildable_projects()
  if not projects:
    return BuildModifiedProjectsResult.NONE_BUILT

  failed_projects = []
  for project in projects:
    try:
      build_project(project)
    except subprocess.CalledProcessError:
      failed_projects.append(project)

  if failed_projects:
    print('Failed projects:', ' '.join(failed_projects))
    return BuildModifiedProjectsResult.BUILD_FAIL

  return BuildModifiedProjectsResult.BUILD_SUCCESS


def is_infra_changed():
  """Returns True if the infra directory was changed."""
  git_output = get_changed_files_output()
  infra_code_regex = '.*infra/.*\n'
  return re.search(infra_code_regex, git_output) is not None


def build_base_images():
  """Builds base images."""
  # TODO(jonathanmetzman): Investigate why caching fails so often and
  # when we improve it, build base-clang as well. Also, move this function
  # to a helper command when we can support base-clang.
  execute_helper_command(['pull_images'])
  images = [
      'base-image',
      'base-builder',
      'base-runner',
  ]
  for image in images:
    try:
      execute_helper_command(['build_image', image, '--no-pull'])
    except subprocess.CalledProcessError:
      return 1

  return 0


def build_canary_project():
  """Builds a specific project when infra/ is changed to verify that infra/
  changes don't break things. Returns False if build was attempted but
  failed."""

  try:
    build_project('skcms')
  except subprocess.CalledProcessError:
    return False

  return True


def main():
  """Build modified projects or canary project."""
  infra_changed = is_infra_changed()
  if infra_changed:
    print('Pulling and building base images first.')
    if build_base_images():
      return 1

  result = build_modified_projects()
  if result == BuildModifiedProjectsResult.BUILD_FAIL:
    return 1

  # It's unnecessary to build the canary if we've built any projects already.
  no_projects_built = result == BuildModifiedProjectsResult.NONE_BUILT
  should_build_canary = no_projects_built and infra_changed
  if should_build_canary and not build_canary_project():
    return 1

  return 0


if __name__ == '__main__':
  sys.exit(main())