diff options
author | Samuel Huang <huangs@chromium.org> | 2018-03-13 18:19:34 +0000 |
---|---|---|
committer | Edward Lesmes <ehmaldonado@google.com> | 2021-07-23 21:50:59 +0000 |
commit | 06f1ae9aaca969ee95ef840f22b6b461c304542d (patch) | |
tree | f1e5c6624e70628e81fbf38d6cd14b974abe5d93 /suffix_array_unittest.cc | |
download | zucchini-06f1ae9aaca969ee95ef840f22b6b461c304542d.tar.gz |
[Zucchini] Move Zucchini from /chrome/installer/ to /components/.
(Use "git log --follow" to see older revisions of files).
/components/ is the most logical place to put Zucchini, which only
depends on /base and /testing/gtest. This move also enables Zucchini to
be used by the Component Updater. Details:
- Move all files; run the following to change deps and guards:
sed 's/chrome\/installer/components/' *.cc *.h -i
sed 's/CHROME_INSTALLER/COMPONENTS/' *.cc *.h -i
- Sorting works out pretty well!
- Change all 'chrome/installer/zucchini' to 'components/zucchini'
throughout other parts of the repo; sort if necessary.
- Fix 6 'git cl lint' errors.
- Change 1 Bind() usage to BindRepeated().
- Update OWNER.
Bug: 729154
Change-Id: I50c5a7d411ea85f707b5994ab319dfb2a1acccf7
Reviewed-on: https://chromium-review.googlesource.com/954923
Reviewed-by: Greg Thompson <grt@chromium.org>
Reviewed-by: Jochen Eisinger <jochen@chromium.org>
Reviewed-by: Samuel Huang <huangs@chromium.org>
Commit-Queue: Samuel Huang <huangs@chromium.org>
Cr-Commit-Position: refs/heads/master@{#542857}
NOKEYCHECK=True
GitOrigin-RevId: 577ef6c435e8d43be6e3e60ccbcbd1881780f4ec
Diffstat (limited to 'suffix_array_unittest.cc')
-rw-r--r-- | suffix_array_unittest.cc | 331 |
1 files changed, 331 insertions, 0 deletions
diff --git a/suffix_array_unittest.cc b/suffix_array_unittest.cc new file mode 100644 index 0000000..c6f8b02 --- /dev/null +++ b/suffix_array_unittest.cc @@ -0,0 +1,331 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/suffix_array.h" + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <initializer_list> +#include <string> +#include <vector> + +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +using SLType = InducedSuffixSort::SLType; + +} // namespace + +using ustring = std::basic_string<unsigned char>; + +constexpr uint16_t kNumChar = 256; + +ustring MakeUnsignedString(const std::string& str) { + return {str.begin(), str.end()}; +} + +template <class T> +std::vector<T> MakeVector(const std::initializer_list<T>& ilist) { + return {ilist.begin(), ilist.end()}; +} + +void TestSlPartition(std::initializer_list<SLType> expected_sl_partition, + std::initializer_list<size_t> expected_lms_indices, + std::string str) { + using SaisImpl = InducedSuffixSort::Implementation<size_t, uint16_t>; + + std::vector<SLType> sl_partition(str.size()); + EXPECT_EQ(expected_lms_indices.size(), + SaisImpl::BuildSLPartition(str.begin(), str.size(), kNumChar, + sl_partition.rbegin())); + EXPECT_EQ(MakeVector(expected_sl_partition), sl_partition); + + std::vector<size_t> lms_indices(expected_lms_indices.size()); + SaisImpl::FindLmsSuffixes(expected_sl_partition, lms_indices.begin()); + EXPECT_EQ(MakeVector(expected_lms_indices), lms_indices); +} + +TEST(InducedSuffixSortTest, BuildSLPartition) { + TestSlPartition({}, {}, ""); + TestSlPartition( + { + SLType::LType, + }, + {}, "a"); + TestSlPartition( + { + SLType::LType, SLType::LType, + }, + {}, "ba"); + TestSlPartition( + { + SLType::SType, SLType::LType, + }, + {}, "ab"); + TestSlPartition( + { + SLType::SType, SLType::SType, SLType::LType, + }, + {}, "aab"); + TestSlPartition( + { + SLType::LType, SLType::LType, SLType::LType, + }, + {}, "bba"); + TestSlPartition( + { + SLType::LType, SLType::SType, SLType::LType, + }, + {1}, "bab"); + TestSlPartition( + { + SLType::LType, SLType::SType, SLType::SType, SLType::LType, + }, + {1}, "baab"); + + TestSlPartition( + { + SLType::LType, // zucchini + SLType::LType, // ucchini + SLType::SType, // cchini + SLType::SType, // chini + SLType::SType, // hini + SLType::SType, // ini + SLType::LType, // ni + SLType::LType, // i + }, + {2}, "zucchini"); +} + +std::vector<size_t> BucketCount(const std::initializer_list<unsigned char> str, + uint16_t max_key) { + using SaisImpl = InducedSuffixSort::Implementation<size_t, uint16_t>; + return SaisImpl::MakeBucketCount(str.begin(), str.size(), max_key); +} + +TEST(InducedSuffixSortTest, BucketCount) { + using vec = std::vector<size_t>; + + EXPECT_EQ(vec({0, 0, 0, 0}), BucketCount({}, 4)); + EXPECT_EQ(vec({1, 0, 0, 0}), BucketCount({0}, 4)); + EXPECT_EQ(vec({0, 2, 0, 1}), BucketCount({1, 1, 3}, 4)); +} + +std::vector<size_t> InducedSortSubstring(ustring str) { + using SaisImpl = InducedSuffixSort::Implementation<size_t, uint16_t>; + std::vector<SLType> sl_partition(str.size()); + size_t lms_count = SaisImpl::BuildSLPartition( + str.begin(), str.size(), kNumChar, sl_partition.rbegin()); + std::vector<size_t> lms_indices(lms_count); + SaisImpl::FindLmsSuffixes(sl_partition, lms_indices.begin()); + auto buckets = SaisImpl::MakeBucketCount(str.begin(), str.size(), kNumChar); + + std::vector<size_t> suffix_array(str.size()); + SaisImpl::InducedSort(str, str.size(), sl_partition, lms_indices, buckets, + suffix_array.begin()); + + return suffix_array; +} + +TEST(InducedSuffixSortTest, InducedSortSubstring) { + using vec = std::vector<size_t>; + + auto us = MakeUnsignedString; + + // L; a$ + EXPECT_EQ(vec({0}), InducedSortSubstring(us("a"))); + + // SL; ab$, b$ + EXPECT_EQ(vec({0, 1}), InducedSortSubstring(us("ab"))); + + // LL; a$, ba$ + EXPECT_EQ(vec({1, 0}), InducedSortSubstring(us("ba"))); + + // SLL; a$, aba$, ba$ + EXPECT_EQ(vec({2, 0, 1}), InducedSortSubstring(us("aba"))); + + // LSL; ab$, b$, ba + EXPECT_EQ(vec({1, 2, 0}), InducedSortSubstring(us("bab"))); + + // SSL; aab$, ab$, b$ + EXPECT_EQ(vec({0, 1, 2}), InducedSortSubstring(us("aab"))); + + // LSSL; aab$, ab$, b$, ba + EXPECT_EQ(vec({1, 2, 3, 0}), InducedSortSubstring(us("baab"))); +} + +template <class Algorithm> +void TestSuffixSort(ustring test_str) { + std::vector<size_t> suffix_array = + MakeSuffixArray<Algorithm>(test_str, kNumChar); + EXPECT_EQ(test_str.size(), suffix_array.size()); + + // Expect that I[] is a permutation of [0, len]. + std::vector<size_t> sorted_suffix(suffix_array.begin(), suffix_array.end()); + std::sort(sorted_suffix.begin(), sorted_suffix.end()); + for (size_t i = 0; i < test_str.size(); ++i) + EXPECT_EQ(i, sorted_suffix[i]); + + // Expect that all suffixes are strictly ordered. + auto end = test_str.end(); + for (size_t i = 1; i < test_str.size(); ++i) { + auto suf1 = test_str.begin() + suffix_array[i - 1]; + auto suf2 = test_str.begin() + suffix_array[i]; + bool is_less = std::lexicographical_compare(suf1, end, suf2, end); + EXPECT_TRUE(is_less); + } +} + +constexpr const char* test_strs[] = { + "", + "a", + "aa", + "za", + "CACAO", + "aaaaa", + "banana", + "tobeornottobe", + "The quick brown fox jumps over the lazy dog.", + "elephantelephantelephantelephantelephant", + "walawalawashington", + "-------------------------", + "011010011001011010010110011010010", + "3141592653589793238462643383279502884197169399375105", + "\xFF\xFE\xFF\xFE\xFD\x80\x30\x31\x32\x80\x30\xFF\x01\xAB\xCD", + "abccbaabccbaabccbaabccbaabccbaabccbaabccbaabccba", + "0123456789876543210", + "9876543210123456789", + "aababcabcdabcdeabcdefabcdefg", + "asdhklgalksdjghalksdjghalksdjgh", +}; + +TEST(SuffixSortTest, NaiveSuffixSort) { + for (const std::string& test_str : test_strs) { + TestSuffixSort<NaiveSuffixSort>(MakeUnsignedString(test_str)); + } +} + +TEST(SuffixSortTest, InducedSuffixSortSort) { + for (const std::string& test_str : test_strs) { + TestSuffixSort<InducedSuffixSort>(MakeUnsignedString(test_str)); + } +} + +// Test with sequence that has every character. +TEST(SuffixSortTest, AllChar) { + std::vector<unsigned char> all_char(kNumChar); + std::iota(all_char.begin(), all_char.end(), 0); + + { + std::vector<size_t> suffix_array = + MakeSuffixArray<InducedSuffixSort>(all_char, kNumChar); + for (size_t i = 0; i < kNumChar; ++i) + EXPECT_EQ(i, suffix_array[i]); + } + + std::vector<unsigned char> all_char_reverse(all_char.rbegin(), + all_char.rend()); + { + std::vector<size_t> suffix_array = + MakeSuffixArray<InducedSuffixSort>(all_char_reverse, kNumChar); + for (size_t i = 0; i < kNumChar; ++i) + EXPECT_EQ(kNumChar - i - 1, suffix_array[i]); + } +} + +void TestSuffixLowerBound(ustring base_str, ustring search_str) { + std::vector<size_t> suffix_array = + MakeSuffixArray<NaiveSuffixSort>(base_str, kNumChar); + + auto pos = SuffixLowerBound(suffix_array, base_str.begin(), + search_str.begin(), search_str.end()); + + auto end = base_str.end(); + if (pos != suffix_array.begin()) { + // Previous suffix is less than |search_str|. + auto suf = base_str.begin() + pos[-1]; + bool is_less = std::lexicographical_compare(suf, end, search_str.begin(), + search_str.end()); + EXPECT_TRUE(is_less); + } + if (pos != suffix_array.end()) { + // Current suffix is greater of equal to |search_str|. + auto suf = base_str.begin() + *pos; + bool is_less = std::lexicographical_compare(suf, end, search_str.begin(), + search_str.end()); + EXPECT_FALSE(is_less); + } +} + +TEST(SuffixArrayTest, LowerBound) { + auto us = MakeUnsignedString; + + TestSuffixLowerBound(us(""), us("")); + TestSuffixLowerBound(us(""), us("a")); + TestSuffixLowerBound(us("b"), us("")); + TestSuffixLowerBound(us("b"), us("a")); + TestSuffixLowerBound(us("b"), us("c")); + TestSuffixLowerBound(us("b"), us("bc")); + TestSuffixLowerBound(us("aa"), us("a")); + TestSuffixLowerBound(us("aa"), us("aa")); + + ustring sentence = us("the quick brown fox jumps over the lazy dog."); + // Entire string: exact and unique. + TestSuffixLowerBound(sentence, sentence); + // Empty string: exact and non-unique. + TestSuffixLowerBound(sentence, us("")); + // Exact and unique suffix matches. + TestSuffixLowerBound(sentence, us(".")); + TestSuffixLowerBound(sentence, us("the lazy dog.")); + // Exact and unique non-suffix matches. + TestSuffixLowerBound(sentence, us("quick")); + TestSuffixLowerBound(sentence, us("the quick")); + // Partial and unique matches. + TestSuffixLowerBound(sentence, us("fox jumps with the hosps")); + TestSuffixLowerBound(sentence, us("xyz")); + // Exact and non-unique match: take lexicographical first. + TestSuffixLowerBound(sentence, us("the")); + TestSuffixLowerBound(sentence, us(" ")); + // Partial and non-unique match. + // query < "the l"... < "the q"... + TestSuffixLowerBound(sentence, us("the apple")); + // "the l"... < query < "the q"... + TestSuffixLowerBound(sentence, us("the opera")); + // "the l"... < "the q"... < query + TestSuffixLowerBound(sentence, us("the zebra")); + // Prefix match dominates suffix match (unique). + TestSuffixLowerBound(sentence, us("over quick brown fox")); + // Empty matchs. + TestSuffixLowerBound(sentence, us(",")); + TestSuffixLowerBound(sentence, us("1234")); + TestSuffixLowerBound(sentence, us("THE QUICK BROWN FOX")); + TestSuffixLowerBound(sentence, us("(the")); +} + +TEST(SuffixArrayTest, LowerBoundExact) { + for (const std::string& test_str : test_strs) { + ustring test_ustr = MakeUnsignedString(test_str); + + std::vector<size_t> suffix_array = + MakeSuffixArray<InducedSuffixSort>(test_ustr, kNumChar); + + for (size_t lo = 0; lo < test_str.size(); ++lo) { + for (size_t hi = lo + 1; hi <= test_str.size(); ++hi) { + ustring query(test_ustr.begin() + lo, test_ustr.begin() + hi); + ASSERT_EQ(query.size(), hi - lo); + auto pos = SuffixLowerBound(suffix_array, test_ustr.begin(), + query.begin(), query.end()); + EXPECT_TRUE( + std::equal(query.begin(), query.end(), test_ustr.begin() + *pos)); + } + } + } +} + +} // namespace zucchini |