#!/usr/bin/python3 # Copyright 2021 Zac Hatfield-Dodds # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """This fuzzer is an example harness using Hypothesis for structured inputs. It would be possible, though more difficult, to write this test in terms of Atheris' `FuzzedDataProvider` instead of Hypothesis strategies. As well as defining structured inputs however, the call to `test_ujson_roundtrip()` will replay, deduplicate, and minimize any known failing examples from previous runs - which is great when debugging. Hypothesis uses a separate cache to Atheris/LibFuzzer seeds, so this is strictly complementary to your traditional fuzzing workflow. For more details on Hypothesis, see: https://hypothesis.readthedocs.io/en/latest/data.html https://hypothesis.readthedocs.io/en/latest/details.html#use-with-external-fuzzers """ import sys import atheris import ujson from hypothesis import given, strategies as st # We could define all these inline within the call to @given(), # but it's a bit easier to read if we name them here instead. JSON_ATOMS = st.one_of( st.none(), st.booleans(), st.integers(min_value=-(2 ** 63), max_value=2 ** 63 - 1), st.floats(allow_nan=False, allow_infinity=False), st.text(), ) JSON_OBJECTS = st.recursive( base=JSON_ATOMS, extend=lambda inner: st.lists(inner) | st.dictionaries(st.text(), inner), ) UJSON_ENCODE_KWARGS = { "ensure_ascii": st.booleans(), "encode_html_chars": st.booleans(), "escape_forward_slashes": st.booleans(), "sort_keys": st.booleans(), "indent": st.integers(0, 20), } @given(obj=JSON_OBJECTS, kwargs=st.fixed_dictionaries(UJSON_ENCODE_KWARGS)) @atheris.instrument_func def test_ujson_roundtrip(obj, kwargs): """Check that all JSON objects round-trip regardless of other options.""" assert obj == ujson.decode(ujson.encode(obj, **kwargs)) if __name__ == "__main__": # Running `pytest hypothesis_structured_fuzzer.py` will replay, deduplicate, # and minimize any failures discovered by earlier runs or by OSS-Fuzz, or # briefly search for new failures if none are known. # Or, when running via OSS-Fuzz, we'll execute it via the fuzzing hook: atheris.Setup(sys.argv, atheris.instrument_func(test_ujson_roundtrip.hypothesis.fuzz_one_input)) atheris.Fuzz()