aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--port/protobuf.h1
-rw-r--r--src/CMakeLists.txt4
-rw-r--r--src/mutator.cc1
-rw-r--r--src/utf8_fix.cc90
-rw-r--r--src/utf8_fix.h28
-rw-r--r--src/utf8_fix_test.cc63
6 files changed, 185 insertions, 2 deletions
diff --git a/port/protobuf.h b/port/protobuf.h
index be3692d..eefe415 100644
--- a/port/protobuf.h
+++ b/port/protobuf.h
@@ -20,6 +20,7 @@
#include "google/protobuf/message.h"
#include "google/protobuf/text_format.h"
#include "google/protobuf/util/message_differencer.h"
+#include "google/protobuf/wire_format.h"
namespace protobuf_mutator {
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 5486f23..8fcfdb8 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -15,9 +15,10 @@
add_subdirectory(libfuzzer)
add_library(protobuf-mutator
+ binary_format.cc
mutator.cc
text_format.cc
- binary_format.cc)
+ utf8_fix.cc)
target_link_libraries(protobuf-mutator
${PROTOBUF_LIBRARY})
set_property(TARGET protobuf-mutator
@@ -29,6 +30,7 @@ protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS
add_executable(mutator_test
mutator_test.cc
+ utf8_fix_test.cc
weighted_reservoir_sampler_test.cc
${PROTO_SRCS})
target_link_libraries(mutator_test
diff --git a/src/mutator.cc b/src/mutator.cc
index faa942f..8ec240c 100644
--- a/src/mutator.cc
+++ b/src/mutator.cc
@@ -15,7 +15,6 @@
#include "src/mutator.h"
#include <algorithm>
-#include <iostream>
#include <map>
#include <random>
#include <string>
diff --git a/src/utf8_fix.cc b/src/utf8_fix.cc
new file mode 100644
index 0000000..50e6efd
--- /dev/null
+++ b/src/utf8_fix.cc
@@ -0,0 +1,90 @@
+// Copyright 2017 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utf8_fix.h"
+
+#include <algorithm>
+#include <cassert>
+
+namespace protobuf_mutator {
+
+namespace {
+
+void StoreCode(char* e, char32_t code, uint8_t size, uint8_t prefix) {
+ while (--size) {
+ *(--e) = 0x80 | (code & 0x3F);
+ code >>= 6;
+ }
+ *(--e) = prefix | code;
+}
+
+char* FixCode(char* b, const char* e, RandomEngine* random) {
+ const char* start = b;
+ assert(b < e);
+
+ e = std::min<const char*>(e, b + 4);
+ char32_t c = *b++;
+ for (; b < e && (*b & 0xC0) == 0x80; ++b) {
+ c = (c << 6) + (*b & 0x3F);
+ }
+ uint8_t size = b - start;
+ switch (size) {
+ case 1:
+ c &= 0x7F;
+ StoreCode(b, c, size, 0);
+ break;
+ case 2:
+ c &= 0x7FF;
+ if (c < 0x80) {
+ c = std::uniform_int_distribution<char32_t>(0x80, 0x7FF)(*random);
+ }
+ StoreCode(b, c, size, 0xC0);
+ break;
+ case 3:
+ c &= 0xFFFF;
+
+ // [0xD800, 0xE000) are reserved for UTF-16 surrogate halves.
+ if (c < 0x800 || (c >= 0xD800 && c < 0xE000)) {
+ uint32_t halves = 0xE000 - 0xD800;
+ c = std::uniform_int_distribution<char32_t>(0x800,
+ 0xFFFF - halves)(*random);
+ if (c >= 0xD800) c += halves;
+ }
+ StoreCode(b, c, size, 0xE0);
+ break;
+ case 4:
+ c &= 0x1FFFFF;
+ if (c < 0x10000 || c > 0x10FFFF) {
+ c = std::uniform_int_distribution<char32_t>(0x10000, 0x10FFFF)(*random);
+ }
+ StoreCode(b, c, size, 0xF0);
+ break;
+ default:
+ assert(false && "Unexpected size of UTF-8 sequence");
+ }
+ return b;
+}
+
+} // namespace
+
+void FixUtf8String(std::string* str, RandomEngine* random) {
+ if (str->empty()) return;
+ char* b = &(*str)[0];
+ const char* e = b + str->size();
+ while (b < e) {
+ b = FixCode(b, e, random);
+ }
+}
+
+} // namespace protobuf_mutator
diff --git a/src/utf8_fix.h b/src/utf8_fix.h
new file mode 100644
index 0000000..6637e74
--- /dev/null
+++ b/src/utf8_fix.h
@@ -0,0 +1,28 @@
+// Copyright 2017 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef SRC_UTF8_FIX_H_
+#define SRC_UTF8_FIX_H_
+
+#include <string>
+
+#include "src/random.h"
+
+namespace protobuf_mutator {
+
+void FixUtf8String(std::string* str, RandomEngine* random);
+
+} // namespace protobuf_mutator
+
+#endif // SRC_UTF8_FIX_H_
diff --git a/src/utf8_fix_test.cc b/src/utf8_fix_test.cc
new file mode 100644
index 0000000..54f09cd
--- /dev/null
+++ b/src/utf8_fix_test.cc
@@ -0,0 +1,63 @@
+// Copyright 2017 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "src/utf8_fix.h"
+
+#include "port/gtest.h"
+#include "port/protobuf.h"
+
+namespace protobuf_mutator {
+
+protobuf::LogSilencer log_silincer;
+
+class FixUtf8StringTest : public ::testing::TestWithParam<int> {
+ public:
+ bool IsStructurallyValid(const std::string& s) {
+ using protobuf::internal::WireFormatLite;
+ return WireFormatLite::VerifyUtf8String(s.data(), s.length(),
+ WireFormatLite::PARSE, "");
+ }
+};
+
+TEST_F(FixUtf8StringTest, IsStructurallyValid) {
+ EXPECT_TRUE(IsStructurallyValid(""));
+ EXPECT_TRUE(IsStructurallyValid("abc"));
+ EXPECT_TRUE(IsStructurallyValid("\xC2\xA2"));
+ EXPECT_TRUE(IsStructurallyValid("\xE2\x82\xAC"));
+ EXPECT_TRUE(IsStructurallyValid("\xF0\x90\x8D\x88"));
+ EXPECT_FALSE(IsStructurallyValid("\xFF\xFF\xFF\xFF"));
+ EXPECT_FALSE(IsStructurallyValid("\xFF\x8F"));
+ EXPECT_FALSE(IsStructurallyValid("\x3F\xBF"));
+}
+
+INSTANTIATE_TEST_CASE_P(Size, FixUtf8StringTest, ::testing::Range(0, 10));
+
+TEST_P(FixUtf8StringTest, FixUtf8String) {
+ RandomEngine random(GetParam());
+ std::uniform_int_distribution<uint8_t> random8(0, 0xFF);
+
+ std::string str(random8(random), 0);
+ for (uint32_t run = 0; run < 10000; ++run) {
+ for (size_t i = 0; i < str.size(); ++i) str[i] = random8(random);
+ std::string fixed = str;
+ FixUtf8String(&fixed, &random);
+ if (IsStructurallyValid(str)) {
+ EXPECT_EQ(str, fixed);
+ } else {
+ EXPECT_TRUE(IsStructurallyValid(fixed));
+ }
+ }
+}
+
+} // namespace protobuf_mutator