aboutsummaryrefslogtreecommitdiff
path: root/projects
diff options
context:
space:
mode:
authorGoogle AutoFuzz Team <security-tps@google.com>2020-12-16 17:51:44 +0100
committerGitHub <noreply@github.com>2020-12-16 08:51:44 -0800
commit22704c168eb51dbf4c8f9ef64717eb61fc254957 (patch)
tree0335ae51c1e629a443370015fe7c68db2f00a87e /projects
parent724f5abdd219a4e0cf9ee6e58cb9da42702832e8 (diff)
downloadoss-fuzz-22704c168eb51dbf4c8f9ef64717eb61fc254957.tar.gz
Add BeautifulSoup (#4821)
* Add BeautifulSoup BeautifulSoup is the standard to parse untrusted/invalid/weird html in Python, so fuzzing it for unexpected exceptions and infinite loops makes sense. Moreover, it's using various parsers, with some of them written in C. * Update the building script * ValueError is also a valid exception
Diffstat (limited to 'projects')
-rw-r--r--projects/bs4/Dockerfile27
-rw-r--r--projects/bs4/bs4_fuzzer.py66
-rw-r--r--projects/bs4/build.sh33
-rw-r--r--projects/bs4/project.yaml12
4 files changed, 138 insertions, 0 deletions
diff --git a/projects/bs4/Dockerfile b/projects/bs4/Dockerfile
new file mode 100644
index 000000000..dc5c96966
--- /dev/null
+++ b/projects/bs4/Dockerfile
@@ -0,0 +1,27 @@
+# Copyright 2019 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+
+FROM gcr.io/oss-fuzz-base/base-builder
+
+RUN apt install -y bzr python-lxml python-html5lib
+RUN pip3 install 2to3 soupsieve html5lib lxml
+RUN bzr branch lp:beautifulsoup
+WORKDIR beautifulsoup
+# Beautifulsoup is written in python2, with a script to atomatically convert it to python3.
+RUN yes | ./convert-py3k
+WORKDIR py3k
+
+COPY build.sh bs4_fuzzer.py $SRC/
diff --git a/projects/bs4/bs4_fuzzer.py b/projects/bs4/bs4_fuzzer.py
new file mode 100644
index 000000000..119426174
--- /dev/null
+++ b/projects/bs4/bs4_fuzzer.py
@@ -0,0 +1,66 @@
+#!/usr/bin/python3
+
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import sys
+import warnings
+import atheris
+
+from bs4 import BeautifulSoup
+
+
+try:
+ import HTMLParser
+ HTMLParseError = HTMLParser.HTMLParseError
+except ImportError:
+ # HTMLParseError is removed in Python 3.5. Since it can never be
+ # thrown in 3.5, we can just define our own class as a placeholder.
+
+ class HTMLParseError(Exception):
+ pass
+
+
+def TestOneInput(data):
+ """TestOneInput gets random data from the fuzzer, and throws it at bs4."""
+ if len(data) < 1:
+ return
+
+ parsers = ['lxml-xml', 'html5lib', 'html.parser', 'lxml']
+ try:
+ idx = int(data[0]) % len(parsers)
+ except ValueError:
+ return
+
+ try:
+ soup = BeautifulSoup(data[1:], features=parsers[idx])
+ except HTMLParseError:
+ return
+ except ValueError:
+ return
+
+ list(soup.find_all(True))
+ soup.prettify()
+
+
+def main():
+ logging.disable(logging.CRITICAL)
+ warnings.filterwarnings('ignore')
+ atheris.Setup(sys.argv, TestOneInput, enable_python_coverage=True)
+ atheris.Fuzz()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/projects/bs4/build.sh b/projects/bs4/build.sh
new file mode 100644
index 000000000..111be4645
--- /dev/null
+++ b/projects/bs4/build.sh
@@ -0,0 +1,33 @@
+#!/bin/bash -eu
+# Copyright 2020 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+
+
+# Build fuzzers in $OUT.
+for fuzzer in $(find $SRC -name '*_fuzzer.py'); do
+ fuzzer_basename=$(basename -s .py $fuzzer)
+ fuzzer_package=${fuzzer_basename}.pkg
+ pyinstaller --distpath $OUT --onefile --name $fuzzer_package $fuzzer
+
+ # Create execution wrapper.
+ echo "#!/bin/sh
+# LLVMFuzzerTestOneInput for fuzzer detection.
+this_dir=\$(dirname \"\$0\")
+LD_PRELOAD=\$this_dir/sanitizer_with_fuzzer.so \
+ASAN_OPTIONS=\$ASAN_OPTIONS:symbolize=1:external_symbolizer_path=\$this_dir/llvm-symbolizer:detect_leaks=0 \
+\$this_dir/$fuzzer_package \$@" > $OUT/$fuzzer_basename
+ chmod u+x $OUT/$fuzzer_basename
+done
diff --git a/projects/bs4/project.yaml b/projects/bs4/project.yaml
new file mode 100644
index 000000000..01ff45175
--- /dev/null
+++ b/projects/bs4/project.yaml
@@ -0,0 +1,12 @@
+homepage: "https://www.crummy.com/software/BeautifulSoup/"
+main_repo: "https://code.launchpad.net/~leonardr/beautifulsoup/bs4"
+language: python
+primary_contact: "security-tps@google.com"
+auto_ccs:
+ - "jvoisin@google.com"
+ - "ipudney@google.com"
+fuzzing_engines:
+ - libfuzzer
+sanitizers:
+ - address
+ - undefined