diff options
-rw-r--r-- | port/protobuf.h | 1 | ||||
-rw-r--r-- | src/CMakeLists.txt | 4 | ||||
-rw-r--r-- | src/mutator.cc | 1 | ||||
-rw-r--r-- | src/utf8_fix.cc | 90 | ||||
-rw-r--r-- | src/utf8_fix.h | 28 | ||||
-rw-r--r-- | src/utf8_fix_test.cc | 63 |
6 files changed, 185 insertions, 2 deletions
diff --git a/port/protobuf.h b/port/protobuf.h index be3692d..eefe415 100644 --- a/port/protobuf.h +++ b/port/protobuf.h @@ -20,6 +20,7 @@ #include "google/protobuf/message.h" #include "google/protobuf/text_format.h" #include "google/protobuf/util/message_differencer.h" +#include "google/protobuf/wire_format.h" namespace protobuf_mutator { diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5486f23..8fcfdb8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,9 +15,10 @@ add_subdirectory(libfuzzer) add_library(protobuf-mutator + binary_format.cc mutator.cc text_format.cc - binary_format.cc) + utf8_fix.cc) target_link_libraries(protobuf-mutator ${PROTOBUF_LIBRARY}) set_property(TARGET protobuf-mutator @@ -29,6 +30,7 @@ protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS add_executable(mutator_test mutator_test.cc + utf8_fix_test.cc weighted_reservoir_sampler_test.cc ${PROTO_SRCS}) target_link_libraries(mutator_test diff --git a/src/mutator.cc b/src/mutator.cc index faa942f..8ec240c 100644 --- a/src/mutator.cc +++ b/src/mutator.cc @@ -15,7 +15,6 @@ #include "src/mutator.h" #include <algorithm> -#include <iostream> #include <map> #include <random> #include <string> diff --git a/src/utf8_fix.cc b/src/utf8_fix.cc new file mode 100644 index 0000000..50e6efd --- /dev/null +++ b/src/utf8_fix.cc @@ -0,0 +1,90 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/utf8_fix.h" + +#include <algorithm> +#include <cassert> + +namespace protobuf_mutator { + +namespace { + +void StoreCode(char* e, char32_t code, uint8_t size, uint8_t prefix) { + while (--size) { + *(--e) = 0x80 | (code & 0x3F); + code >>= 6; + } + *(--e) = prefix | code; +} + +char* FixCode(char* b, const char* e, RandomEngine* random) { + const char* start = b; + assert(b < e); + + e = std::min<const char*>(e, b + 4); + char32_t c = *b++; + for (; b < e && (*b & 0xC0) == 0x80; ++b) { + c = (c << 6) + (*b & 0x3F); + } + uint8_t size = b - start; + switch (size) { + case 1: + c &= 0x7F; + StoreCode(b, c, size, 0); + break; + case 2: + c &= 0x7FF; + if (c < 0x80) { + c = std::uniform_int_distribution<char32_t>(0x80, 0x7FF)(*random); + } + StoreCode(b, c, size, 0xC0); + break; + case 3: + c &= 0xFFFF; + + // [0xD800, 0xE000) are reserved for UTF-16 surrogate halves. + if (c < 0x800 || (c >= 0xD800 && c < 0xE000)) { + uint32_t halves = 0xE000 - 0xD800; + c = std::uniform_int_distribution<char32_t>(0x800, + 0xFFFF - halves)(*random); + if (c >= 0xD800) c += halves; + } + StoreCode(b, c, size, 0xE0); + break; + case 4: + c &= 0x1FFFFF; + if (c < 0x10000 || c > 0x10FFFF) { + c = std::uniform_int_distribution<char32_t>(0x10000, 0x10FFFF)(*random); + } + StoreCode(b, c, size, 0xF0); + break; + default: + assert(false && "Unexpected size of UTF-8 sequence"); + } + return b; +} + +} // namespace + +void FixUtf8String(std::string* str, RandomEngine* random) { + if (str->empty()) return; + char* b = &(*str)[0]; + const char* e = b + str->size(); + while (b < e) { + b = FixCode(b, e, random); + } +} + +} // namespace protobuf_mutator diff --git a/src/utf8_fix.h b/src/utf8_fix.h new file mode 100644 index 0000000..6637e74 --- /dev/null +++ b/src/utf8_fix.h @@ -0,0 +1,28 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SRC_UTF8_FIX_H_ +#define SRC_UTF8_FIX_H_ + +#include <string> + +#include "src/random.h" + +namespace protobuf_mutator { + +void FixUtf8String(std::string* str, RandomEngine* random); + +} // namespace protobuf_mutator + +#endif // SRC_UTF8_FIX_H_ diff --git a/src/utf8_fix_test.cc b/src/utf8_fix_test.cc new file mode 100644 index 0000000..54f09cd --- /dev/null +++ b/src/utf8_fix_test.cc @@ -0,0 +1,63 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/utf8_fix.h" + +#include "port/gtest.h" +#include "port/protobuf.h" + +namespace protobuf_mutator { + +protobuf::LogSilencer log_silincer; + +class FixUtf8StringTest : public ::testing::TestWithParam<int> { + public: + bool IsStructurallyValid(const std::string& s) { + using protobuf::internal::WireFormatLite; + return WireFormatLite::VerifyUtf8String(s.data(), s.length(), + WireFormatLite::PARSE, ""); + } +}; + +TEST_F(FixUtf8StringTest, IsStructurallyValid) { + EXPECT_TRUE(IsStructurallyValid("")); + EXPECT_TRUE(IsStructurallyValid("abc")); + EXPECT_TRUE(IsStructurallyValid("\xC2\xA2")); + EXPECT_TRUE(IsStructurallyValid("\xE2\x82\xAC")); + EXPECT_TRUE(IsStructurallyValid("\xF0\x90\x8D\x88")); + EXPECT_FALSE(IsStructurallyValid("\xFF\xFF\xFF\xFF")); + EXPECT_FALSE(IsStructurallyValid("\xFF\x8F")); + EXPECT_FALSE(IsStructurallyValid("\x3F\xBF")); +} + +INSTANTIATE_TEST_CASE_P(Size, FixUtf8StringTest, ::testing::Range(0, 10)); + +TEST_P(FixUtf8StringTest, FixUtf8String) { + RandomEngine random(GetParam()); + std::uniform_int_distribution<uint8_t> random8(0, 0xFF); + + std::string str(random8(random), 0); + for (uint32_t run = 0; run < 10000; ++run) { + for (size_t i = 0; i < str.size(); ++i) str[i] = random8(random); + std::string fixed = str; + FixUtf8String(&fixed, &random); + if (IsStructurallyValid(str)) { + EXPECT_EQ(str, fixed); + } else { + EXPECT_TRUE(IsStructurallyValid(fixed)); + } + } +} + +} // namespace protobuf_mutator |