Cleanup build script, fix tensorflow build (#3952)

* Need to manually install numpy now as it is used in the toolchain * Handle review * Remove bazel install since we install bazelisk * Force symlink python3 -> python * Refactor build script * Remove `-stdlib=libc++` as that causes link errors * Remove `identity_fuzz` as it results in a huge fuzzer. Will work on reducing size and then enable back * Copy fuzzer to `$OUT`, not move * Handle coverage support
author: Mihai Maruseac <mihaimaruseac@google.com> 2020-06-15 16:38:48 +0000
committer: GitHub <noreply@github.com> 2020-06-15 09:38:48 -0700
commit: 7a059f4355560223264645ebd3f5af8205b98537 (patch)
tree: 2f03920c3605416fccc2715c5479c21039fe3566 /projects/tensorflow
parent: c06528180b3893199212d17a640f0037f6f40088 (diff)
download: oss-fuzz-7a059f4355560223264645ebd3f5af8205b98537.tar.gz
2 files changed, 57 insertions, 107 deletions
diff --git a/projects/tensorflow/Dockerfile b/projects/tensorflow/Dockerfile
index ab4bae1e3..6230126bc 100644
--- a/projects/tensorflow/Dockerfile
+++ b/projects/tensorflow/Dockerfile
@@ -25,10 +25,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     rm -rf /var/lib/apt/lists/*
 RUN python3 -m pip install numpy
 
-# Install Bazel from apt-get to ensure dependencies are there
-RUN echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list
-RUN curl https://bazel.build/bazel-release.pub.gpg | apt-key add -
-RUN apt-get update && apt-get install -y bazel
+# Due to Bazel bug, need to symlink python3 to python
+# See https://github.com/bazelbuild/bazel/issues/8665
+RUN ln -s /usr/local/bin/python3 /usr/local/bin/python
+
+# Install Bazelisk to keep bazel in sync with the version required by TensorFlow
 RUN curl -Lo /usr/bin/bazel \
         https://github.com/bazelbuild/bazelisk/releases/download/v1.1.0/bazelisk-linux-amd64 \
         && \
diff --git a/projects/tensorflow/build.sh b/projects/tensorflow/build.sh
index 777821ecd..478b0fbb9 100755
--- a/projects/tensorflow/build.sh
+++ b/projects/tensorflow/build.sh
@@ -15,129 +15,78 @@
 #
 ################################################################################
 
-# Generate the list of fuzzers we have (only the base/op name).
-FUZZING_BUILD_FILE="tensorflow/core/kernels/fuzzing/BUILD"
-declare -r FUZZERS=$(
-  grep '^tf_ops_fuzz_target' ${FUZZING_BUILD_FILE} | cut -d'"' -f2 | head -n5
-)
-
-# Add a few more flags to make sure fuzzers build and run successfully.
-# Note the c++11/libc++ flags to build using the same toolchain as the one used
-# to build libFuzzingEngine.
-CFLAGS="${CFLAGS} -fno-sanitize=vptr"
-CXXFLAGS="${CXXFLAGS} -fno-sanitize=vptr"
-
-# Force Python3 and install required python deps
+# Force Python3, run configure.py to pick the right build config
 PYTHON=python3
-
-# Make sure we run ./configure to detect when we are using a Bazel out of range
 yes "" | ${PYTHON} configure.py
 
-# Due to statically linking boringssl dependency, we have to define one extra
-# flag when compiling for memory fuzzing (see the boringssl project).
-if [ "$SANITIZER" = "memory" ]
-then
-  CFLAGS="${CFLAGS} -DOPENSSL_NO_ASM=1"
-  CXXFLAGS="${CXXFLAGS} -DOPENSSL_NO_ASM=1"
-fi
-
-# All of the flags in $CFLAGS and $CXXFLAGS need to be passed to bazel too.
-# Also, pass in flags to ensure static build and to help in debugging failures.
+# Since Bazel passes flags to compilers via `--copt`, `--conlyopt` and
+# `--cxxopt`, we need to move all flags from `$CFLAGS` and `$CXXFLAGS` to these.
+# We don't use `--copt` as warnings issued by C compilers when encountering a
+# C++-only option results in errors during build.
+#
+# Note: Make sure that by this line `$CFLAGS` and `$CXXFLAGS` are properly set
+# up as further changes to them won't be visible to Bazel.
+#
+# Note: We remove the `-stdlib=libc++` flag as Bazel produces linker errors if
+# it is present.
 declare -r EXTRA_FLAGS="\
---config=monolithic --dynamic_mode=off \
---verbose_failures \
 $(
 for f in ${CFLAGS}; do
   echo "--conlyopt=${f}" "--linkopt=${f}"
 done
 for f in ${CXXFLAGS}; do
-  echo "--cxxopt=${f}" "--linkopt=${f}"
+  if [[ "$f" != "-stdlib=libc++" ]]; then
+    echo "--cxxopt=${f}" "--linkopt=${f}"
+  fi
 done
 )"
 
-# We need a new bazel function to build the actual binary.
-cat >> tensorflow/core/kernels/fuzzing/tf_ops_fuzz_target_lib.bzl << END
-
-def cc_tf(name):
-    native.cc_test(
-        name = name + "_fuzz",
-        deps = [
-            "//tensorflow/core/kernels/fuzzing:fuzz_session",
-            "//tensorflow/core/kernels/fuzzing:" + name + "_fuzz_lib",
-            "//tensorflow/cc:cc_ops",
-            "//tensorflow/cc:scope",
-            "//tensorflow/core:core_cpu",
-        ],
-    )
-END
-
-# Import this function in the proper BUILD file.
-cat >> ${FUZZING_BUILD_FILE} << END
-
-load("//tensorflow/core/kernels/fuzzing:tf_ops_fuzz_target_lib.bzl", "cc_tf")
-
-END
-
-# And invoke it for all fuzzers.
-for fuzzer in ${FUZZERS}; do
-  echo cc_tf\(\"${fuzzer}\"\) >> ${FUZZING_BUILD_FILE}
+# Determine all fuzz targets. To control what gets fuzzed with OSSFuzz, all
+# supported fuzzers are in `//tensorflow/security/fuzzing`.
+declare -r FUZZERS=$(bazel query 'tests(//tensorflow/security/fuzzing/...)' | grep -v identity)
+
+# Build the fuzzer targets.
+# Pass in `--verbose_failures` so it is easy to debug compile crashes.
+# Pass in `--strip=never` to ensure coverage support.
+# Pass in `$LIB_FUZZING_ENGINE` to `--copt` and `--linkopt` to ensure we have a
+# `main` symbol defined (all these fuzzers build without a `main` and by default
+# `$CFLAGS` and `CXXFLAGS` compile with `-fsanitize=fuzzer-no-link`).
+bazel build \
+  ${EXTRA_FLAGS} \
+  --verbose_failures \
+  --strip=never \
+  --copt=${LIB_FUZZING_ENGINE} \
+  --linkopt=${LIB_FUZZING_ENGINE} \
+  -- ${FUZZERS}
+
+# The fuzzers built above are in the `bazel-bin/` symlink. But they need to be
+# in `$OUT`, so move them accordingly.
+for bazel_target in ${FUZZERS}; do
+  colon_index=$(expr index "${bazel_target}" ":")
+  fuzz_name="${bazel_target:$colon_index}"
+  bazel_location="bazel-bin/${bazel_target/:/\/}"
+  cp ${bazel_location} ${OUT}/$fuzz_name
 done
 
-# Since we force the environment, we expect bazel to fail during the linking of
-# each fuzzer. Hence, we will do the linking manually at the end of the process.
-# We just need to make sure we use the same invocation as bazel would use, so
-# use --verbose_failures (in ${EXTRA_FLAGS}) to get it and then encode it in the
-# following ${LINK_ARGS}.
-declare -r LINK_ARGS="\
--pthread -fuse-ld=gold \
--Wl,-no-as-needed -Wl,-z,relro,-z,now \
--B/usr/local/bin -B/usr/bin -Wl,--gc-sections \
-"
-
-# This should always look as successful despite linking error mentioned above.
-bazel build --jobs=2 ${EXTRA_FLAGS} -k //tensorflow/core/kernels/fuzzing:all || true
-
-# For each fuzzer target, we only have to link it manually to get the binary.
-for fuzzer in ${FUZZERS}; do
-  fz=${fuzzer}_fuzz
-
-  # Get the file with the parameters for linking or fail if it didn't exist.
-  lfile=`ls -1 bazel-bin/tensorflow/core/kernels/fuzzing/${fz}*.params | head -n1`
-
-  # Manually link everything.
-  ${CXX} ${CXXFLAGS} $LIB_FUZZING_ENGINE -o ${OUT}/${fz} ${LINK_ARGS} -Wl,@${lfile}
-done
-
-# For coverage, we need one extra step, see the envoy and grpc projects.
+# For coverage, we need to remap source files to correspond to the Bazel build
+# paths. We also need to resolve all symlinks that Bazel creates.
 if [ "$SANITIZER" = "coverage" ]
 then
-  declare -r REMAP_PATH=${OUT}/proc/self/cwd
+  declare -r RSYNC_CMD="rsync -avLkR"
+  declare -r REMAP_PATH=${OUT}/proc/self/cwd/
   mkdir -p ${REMAP_PATH}
-  rsync -ak ${SRC}/tensorflow/tensorflow ${REMAP_PATH}
-  rsync -ak ${SRC}/tensorflow/third_party ${REMAP_PATH}
 
-  # Also copy bazel generated files (via genrules)
-  declare -r BAZEL_PREFIX=bazel-out/k8-opt
-  declare -r REMAP_BAZEL_PATH=${REMAP_PATH}/${BAZEL_PREFIX}
-  mkdir -p ${REMAP_BAZEL_PATH}
-  rsync -ak ${SRC}/tensorflow/${BAZEL_PREFIX}/genfiles ${REMAP_BAZEL_PATH}
+  # Sync existing code.
+  ${RSYNC_CMD} tensorflow/ ${REMAP_PATH}
 
-  # Finally copy the external archives source files
-  rsync -ak ${SRC}/tensorflow/bazel-tensorflow/external ${REMAP_PATH}
-fi
+  # Sync generated proto files.
+  ${RSYNC_CMD} ./bazel-out/k8-opt/bin/tensorflow/core/protobuf ${REMAP_PATH}
 
-# Now that all is done, we just have to copy the existing corpora and
-# dictionaries to have them available in the runtime environment.
-# The tweaks to the filenames below are to make sure corpora/dictionary have
-# similar names as the fuzzer binary.
-for dict in tensorflow/core/kernels/fuzzing/dictionaries/*; do
-  name=$(basename -- $dict)
-  cp ${dict} ${OUT}/${name/.dict/_fuzz.dict}
-done
-for corpus in tensorflow/core/kernels/fuzzing/corpus/*; do
-  name=$(basename -- $corpus)
-  zip ${OUT}/${name}_fuzz_seed_corpus.zip ${corpus}/*
-done
+  # Sync external dependencies. We don't need to include `bazel-tensorflow`.
+  pushd bazel-tensorflow
+  ${RSYNC_CMD} external/ ${REMAP_PATH}
+  popd
+fi
 
 # Finally, make sure we don't accidentally run with stuff from the bazel cache.
 rm -f bazel-*
author	Mihai Maruseac <mihaimaruseac@google.com>	2020-06-15 16:38:48 +0000
committer	GitHub <noreply@github.com>	2020-06-15 09:38:48 -0700
commit	7a059f4355560223264645ebd3f5af8205b98537 (patch)
tree	2f03920c3605416fccc2715c5479c21039fe3566 /projects/tensorflow
parent	c06528180b3893199212d17a640f0037f6f40088 (diff)
download	oss-fuzz-7a059f4355560223264645ebd3f5af8205b98537.tar.gz