diff options
Diffstat (limited to 'base/i18n/streaming_utf8_validator_perftest.cc')
-rw-r--r-- | base/i18n/streaming_utf8_validator_perftest.cc | 238 |
1 files changed, 0 insertions, 238 deletions
diff --git a/base/i18n/streaming_utf8_validator_perftest.cc b/base/i18n/streaming_utf8_validator_perftest.cc deleted file mode 100644 index 6b8c17b654..0000000000 --- a/base/i18n/streaming_utf8_validator_perftest.cc +++ /dev/null @@ -1,238 +0,0 @@ -// Copyright 2014 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// All data that is passed through a WebSocket with type "Text" needs to be -// validated as UTF8. Since this is done on the IO thread, it needs to be -// reasonably fast. - -// We are only interested in the performance on valid UTF8. Invalid UTF8 will -// result in a connection failure, so is unlikely to become a source of -// performance issues. - -#include "base/i18n/streaming_utf8_validator.h" - -#include <string> - -#include "base/basictypes.h" -#include "base/bind.h" -#include "base/callback.h" -#include "base/strings/string_util.h" -#include "base/strings/stringprintf.h" -#include "base/test/perf_time_logger.h" -#include "testing/gtest/include/gtest/gtest.h" - -namespace base { -namespace { - -// We want to test ranges of valid UTF-8 sequences. These ranges are inclusive. -// They are intended to be large enough that the validator needs to do -// meaningful work while being in some sense "realistic" (eg. control characters -// are not included). -const char kOneByteSeqRangeStart[] = " "; // U+0020 -const char kOneByteSeqRangeEnd[] = "~"; // U+007E - -const char kTwoByteSeqRangeStart[] = "\xc2\xa0"; // U+00A0 non-breaking space -const char kTwoByteSeqRangeEnd[] = "\xc9\x8f"; // U+024F small y with stroke - -const char kThreeByteSeqRangeStart[] = "\xe3\x81\x82"; // U+3042 Hiragana "a" -const char kThreeByteSeqRangeEnd[] = "\xe9\xbf\x83"; // U+9FC3 "to blink" - -const char kFourByteSeqRangeStart[] = "\xf0\xa0\x80\x8b"; // U+2000B -const char kFourByteSeqRangeEnd[] = "\xf0\xaa\x9a\xb2"; // U+2A6B2 - -// The different lengths of strings to test. -const size_t kTestLengths[] = {1, 32, 256, 32768, 1 << 20}; - -// Simplest possible byte-at-a-time validator, to provide a baseline -// for comparison. This is only tried on 1-byte UTF-8 sequences, as -// the results will not be meaningful with sequences containing -// top-bit-set bytes. -bool IsString7Bit(const std::string& s) { - for (std::string::const_iterator it = s.begin(); it != s.end(); ++it) { - if (*it & 0x80) - return false; - } - return true; -} - -// Assumes that |previous| is a valid UTF-8 sequence, and attempts to return -// the next one. Is just barely smart enough to iterate through the ranges -// defined about. -std::string NextUtf8Sequence(const std::string& previous) { - DCHECK(StreamingUtf8Validator::Validate(previous)); - std::string next = previous; - for (int i = static_cast<int>(previous.length() - 1); i >= 0; --i) { - // All bytes in a UTF-8 sequence except the first one are - // constrained to the range 0x80 to 0xbf, inclusive. When we - // increment past 0xbf, we carry into the previous byte. - if (i > 0 && next[i] == '\xbf') { - next[i] = '\x80'; - continue; // carry - } - ++next[i]; - break; // no carry - } - DCHECK(StreamingUtf8Validator::Validate(next)) - << "Result \"" << next << "\" failed validation"; - return next; -} - -typedef bool (*TestTargetType)(const std::string&); - -// Run fuction |target| over |test_string| |times| times, and report the results -// using |description|. -bool RunTest(const std::string& description, - TestTargetType target, - const std::string& test_string, - int times) { - base::PerfTimeLogger timer(description.c_str()); - bool result = true; - for (int i = 0; i < times; ++i) { - result = target(test_string) && result; - } - timer.Done(); - return result; -} - -// Construct a string by repeating |input| enough times to equal or exceed -// |length|. -std::string ConstructRepeatedTestString(const std::string& input, - size_t length) { - std::string output = input; - while (output.length() * 2 < length) { - output += output; - } - if (output.length() < length) { - output += ConstructRepeatedTestString(input, length - output.length()); - } - return output; -} - -// Construct a string by expanding the range of UTF-8 sequences -// between |input_start| and |input_end|, inclusive, and then -// repeating the resulting string until it equals or exceeds |length| -// bytes. |input_start| and |input_end| must be valid UTF-8 -// sequences. -std::string ConstructRangedTestString(const std::string& input_start, - const std::string& input_end, - size_t length) { - std::string output = input_start; - std::string input = input_start; - while (output.length() < length && input != input_end) { - input = NextUtf8Sequence(input); - output += input; - } - if (output.length() < length) { - output = ConstructRepeatedTestString(output, length); - } - return output; -} - -struct TestFunctionDescription { - TestTargetType function; - const char* function_name; -}; - -bool IsStringUTF8(const std::string& str) { - return base::IsStringUTF8(base::StringPiece(str)); -} - -// IsString7Bit is intentionally placed last so it can be excluded easily. -const TestFunctionDescription kTestFunctions[] = { - {&StreamingUtf8Validator::Validate, "StreamingUtf8Validator"}, - {&IsStringUTF8, "IsStringUTF8"}, {&IsString7Bit, "IsString7Bit"}}; - -// Construct a test string from |construct_test_string| for each of the lengths -// in |kTestLengths| in turn. For each string, run each test in |test_functions| -// for a number of iterations such that the total number of bytes validated -// is around 16MB. -void RunSomeTests( - const char format[], - base::Callback<std::string(size_t length)> construct_test_string, - const TestFunctionDescription* test_functions, - size_t test_count) { - for (size_t i = 0; i < arraysize(kTestLengths); ++i) { - const size_t length = kTestLengths[i]; - const std::string test_string = construct_test_string.Run(length); - const int real_length = static_cast<int>(test_string.length()); - const int times = (1 << 24) / real_length; - for (size_t test_index = 0; test_index < test_count; ++test_index) { - EXPECT_TRUE(RunTest(StringPrintf(format, - test_functions[test_index].function_name, - real_length, - times), - test_functions[test_index].function, - test_string, - times)); - } - } -} - -TEST(StreamingUtf8ValidatorPerfTest, OneByteRepeated) { - RunSomeTests("%s: bytes=1 repeated length=%d repeat=%d", - base::Bind(ConstructRepeatedTestString, kOneByteSeqRangeStart), - kTestFunctions, - 3); -} - -TEST(StreamingUtf8ValidatorPerfTest, OneByteRange) { - RunSomeTests("%s: bytes=1 ranged length=%d repeat=%d", - base::Bind(ConstructRangedTestString, - kOneByteSeqRangeStart, - kOneByteSeqRangeEnd), - kTestFunctions, - 3); -} - -TEST(StreamingUtf8ValidatorPerfTest, TwoByteRepeated) { - RunSomeTests("%s: bytes=2 repeated length=%d repeat=%d", - base::Bind(ConstructRepeatedTestString, kTwoByteSeqRangeStart), - kTestFunctions, - 2); -} - -TEST(StreamingUtf8ValidatorPerfTest, TwoByteRange) { - RunSomeTests("%s: bytes=2 ranged length=%d repeat=%d", - base::Bind(ConstructRangedTestString, - kTwoByteSeqRangeStart, - kTwoByteSeqRangeEnd), - kTestFunctions, - 2); -} - -TEST(StreamingUtf8ValidatorPerfTest, ThreeByteRepeated) { - RunSomeTests( - "%s: bytes=3 repeated length=%d repeat=%d", - base::Bind(ConstructRepeatedTestString, kThreeByteSeqRangeStart), - kTestFunctions, - 2); -} - -TEST(StreamingUtf8ValidatorPerfTest, ThreeByteRange) { - RunSomeTests("%s: bytes=3 ranged length=%d repeat=%d", - base::Bind(ConstructRangedTestString, - kThreeByteSeqRangeStart, - kThreeByteSeqRangeEnd), - kTestFunctions, - 2); -} - -TEST(StreamingUtf8ValidatorPerfTest, FourByteRepeated) { - RunSomeTests("%s: bytes=4 repeated length=%d repeat=%d", - base::Bind(ConstructRepeatedTestString, kFourByteSeqRangeStart), - kTestFunctions, - 2); -} - -TEST(StreamingUtf8ValidatorPerfTest, FourByteRange) { - RunSomeTests("%s: bytes=4 ranged length=%d repeat=%d", - base::Bind(ConstructRangedTestString, - kFourByteSeqRangeStart, - kFourByteSeqRangeEnd), - kTestFunctions, - 2); -} - -} // namespace -} // namespace base |