diff options
-rw-r--r-- | docs/getting-started/new-project-guide/python_lang.md | 33 | ||||
-rw-r--r-- | projects/ujson/Dockerfile | 2 | ||||
-rw-r--r-- | projects/ujson/hypothesis_structured_fuzzer.py | 72 |
3 files changed, 106 insertions, 1 deletions
diff --git a/docs/getting-started/new-project-guide/python_lang.md b/docs/getting-started/new-project-guide/python_lang.md index e6ee177d0..14208e300 100644 --- a/docs/getting-started/new-project-guide/python_lang.md +++ b/docs/getting-started/new-project-guide/python_lang.md @@ -32,7 +32,8 @@ docker images. ### Example project We recommending viewing [ujson](https://github.com/google/oss-fuzz/tree/master/projects/ujson) as an -example of a simple Python fuzzing project. +example of a simple Python fuzzing project, with both plain-Atheris and +Atheris + Hypothesis harnesses. ### project.yaml @@ -99,3 +100,33 @@ ASAN_OPTIONS=\$ASAN_OPTIONS:symbolize=1:external_symbolizer_path=\$this_dir/llvm chmod u+x $OUT/$fuzzer_basename done ``` + +## Hypothesis + +Using [Hypothesis](https://hypothesis.readthedocs.io/), the Python library for +[property-based testing](https://hypothesis.works/articles/what-is-property-based-testing/), +makes it really easy to generate complex inputs - whether in traditional test suites +or [by using test functions as fuzz harnesses](https://hypothesis.readthedocs.io/en/latest/details.html#use-with-external-fuzzers). + +> Property based testing is the construction of tests such that, when these tests are fuzzed, + failures in the test reveal problems with the system under test that could not have been + revealed by direct fuzzing of that system. + +You also get integrated test-case reduction for free - meaning that it's trivial to +report a canonical minimal example for each distinct failure discovered while fuzzing! + +See [here for the core "strategies"](https://hypothesis.readthedocs.io/en/latest/data.html), +for arbitrary data, [here for Numpy + Pandas support](https://hypothesis.readthedocs.io/en/latest/numpy.html), +or [here for a variety of third-party extensions](https://hypothesis.readthedocs.io/en/latest/strategies.html) +supporting everything from protobufs, to jsonschemas, to networkx graphs or geojson +or valid Python source code. + +To use Hypothesis in OSS-Fuzz, install it in your Dockerfile with + +```shell +RUN pip3 install hypothesis +``` + +See [the `ujson` structured fuzzer](https://github.com/google/oss-fuzz/blob/master/projects/ujson/hypothesis_structured_fuzzer.py) +for an example "polyglot" which can either be run with `pytest` as a standard test function, +or run with OSS-Fuzz as a fuzz harness. diff --git a/projects/ujson/Dockerfile b/projects/ujson/Dockerfile index 562cbdc79..f044df681 100644 --- a/projects/ujson/Dockerfile +++ b/projects/ujson/Dockerfile @@ -16,6 +16,8 @@ FROM gcr.io/oss-fuzz-base/base-builder +RUN pip3 install hypothesis + RUN git clone \ --depth 1 \ --branch master \ diff --git a/projects/ujson/hypothesis_structured_fuzzer.py b/projects/ujson/hypothesis_structured_fuzzer.py new file mode 100644 index 000000000..c07a2cf5f --- /dev/null +++ b/projects/ujson/hypothesis_structured_fuzzer.py @@ -0,0 +1,72 @@ +#!/usr/bin/python3 + +# Copyright 2021 Zac Hatfield-Dodds +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This fuzzer is an example harness using Hypothesis for structured inputs. + +It would be possible, though more difficult, to write this test in terms +of Atheris' `FuzzedDataProvider` instead of Hypothesis strategies. + +As well as defining structured inputs however, the call to +`test_ujson_roundtrip()` will replay, deduplicate, and minimize any known +failing examples from previous runs - which is great when debugging. +Hypothesis uses a separate cache to Atheris/LibFuzzer seeds, so this is +strictly complementary to your traditional fuzzing workflow. + +For more details on Hypothesis, see: +https://hypothesis.readthedocs.io/en/latest/data.html +https://hypothesis.readthedocs.io/en/latest/details.html#use-with-external-fuzzers +""" + +import sys +import atheris +import ujson +from hypothesis import given, strategies as st + +# We could define all these inline within the call to @given(), +# but it's a bit easier to read if we name them here instead. +JSON_ATOMS = st.one_of( + st.none(), + st.booleans(), + st.integers(min_value=-(2 ** 63), max_value=2 ** 63 - 1), + st.floats(allow_nan=False, allow_infinity=False), + st.text(), +) +JSON_OBJECTS = st.recursive( + base=JSON_ATOMS, + extend=lambda inner: st.lists(inner) | st.dictionaries(st.text(), inner), +) +UJSON_ENCODE_KWARGS = { + "ensure_ascii": st.booleans(), + "encode_html_chars": st.booleans(), + "escape_forward_slashes": st.booleans(), + "sort_keys": st.booleans(), + "indent": st.integers(0, 20), +} + + +@given(obj=JSON_OBJECTS, kwargs=st.fixed_dictionaries(UJSON_ENCODE_KWARGS)) +def test_ujson_roundtrip(obj, kwargs): + """Check that all JSON objects round-trip regardless of other options.""" + assert obj == ujson.decode(ujson.encode(obj, **kwargs)) + + +if __name__ == "__main__": + # Running `pytest hypothesis_structured_fuzzer.py` will replay, deduplicate, + # and minimize any failures discovered by earlier runs or by OSS-Fuzz, or + # briefly search for new failures if none are known. + # Or, when running via OSS-Fuzz, we'll execute it via the fuzzing hook: + atheris.Setup(sys.argv, test_ujson_roundtrip.hypothesis.fuzz_one_input) + atheris.Fuzz() |