// Copyright 2020 The Pigweed Authors // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy of // the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations under // the License. #include "pw_tokenizer/token_database.h" #include #include #include #include "gtest/gtest.h" namespace pw::tokenizer { namespace { using namespace std::literals::string_view_literals; // Use alignas to ensure that the data is properly aligned for database entries. // This avoids unaligned memory reads. alignas(TokenDatabase::RawEntry) constexpr char kBasicData[] = "TOKENS\0\0\x03\x00\x00\x00\0\0\0\0" "\x01\0\0\0\0\0\0\0" "\x02\0\0\0\0\0\0\0" "\xFF\0\0\0\0\0\0\0" "hi!\0" "goodbye\0" ":)"; alignas(TokenDatabase::RawEntry) constexpr char kEmptyData[] = "TOKENS\0\0\x00\x00\x00\x00\0\0\0"; // Last byte is null terminator. alignas(TokenDatabase::RawEntry) constexpr char kBadMagic[] = "TOKENs\0\0\x03\x00\x00\x00\0\0\0\0" "\x01\0\0\0\0\0\0\0" "hi!\0"; alignas(TokenDatabase::RawEntry) constexpr char kBadVersion[] = "TOKENS\0\1\x00\0\0\0\0\0\0\0"; alignas(TokenDatabase::RawEntry) constexpr char kBadEntryCount[] = "TOKENS\0\0\xff\x00\x00\x00\0\0\0\0"; // Use signed data and a size with the top bit set to test that the entry count // is read correctly, without per-byte sign extension. alignas(TokenDatabase::RawEntry) constexpr signed char kSignedWithTopBit[] = "TOKENS\0\0\x80\x00\x00\x00\0\0\0\0" // Entries "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 32 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 64 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 96 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 128 // Strings (empty) "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 32 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 64 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 96 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; // 128 constexpr TokenDatabase kBasicDatabase = TokenDatabase::Create(); static_assert(kBasicDatabase.size() == 3u); TEST(TokenDatabase, EntryCount) { static_assert(TokenDatabase::Create().size() == 3u); static_assert(TokenDatabase::Create(kEmptyData).size() == 0u); EXPECT_EQ(TokenDatabase::Create().size(), 0x80u); } TEST(TokenDatabase, ValidCheck) { char basic_data[sizeof(kBasicData)]; std::memcpy(basic_data, kBasicData, sizeof(basic_data)); EXPECT_TRUE(TokenDatabase::IsValid(basic_data)); static_assert(TokenDatabase::IsValid(kBasicData)); static_assert(TokenDatabase::IsValid(kEmptyData)); static_assert(TokenDatabase::IsValid(kSignedWithTopBit)); static_assert(!TokenDatabase::IsValid(kBadMagic)); static_assert(!TokenDatabase::IsValid(kBadVersion)); static_assert(!TokenDatabase::IsValid(kBadEntryCount)); static_assert(!TokenDatabase::IsValid("TOKENS\0\0\0\0")); // too short static_assert(!TokenDatabase::IsValid("TOKENS\0\1\0\0\0\0\0\0\0\0")); static_assert(!TokenDatabase::IsValid("TOKENSv0\0\0\0\0\0\0\0\0")); static_assert(!TokenDatabase::IsValid("tokens\0\0\0\0\0\0\0\0\0\0")); // No string table; this is one byte too short. static_assert( !TokenDatabase::IsValid("TOKENS\0\0\x01\x00\x00\x00\0\0\0\0WXYZdate"sv)); // Add one byte for the string table. static_assert( TokenDatabase::IsValid("TOKENS\0\0\x01\x00\x00\x00\0\0\0\0WXYZdate\0"sv)); static_assert( !TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0" "WXYZdate" "WXYZdate" "\0"sv)); static_assert( TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0" "WXYZdate" "WXYZdate" "hi\0\0"sv)); static_assert( TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0" "WXYZdate" "WXYZdate" "hi\0hello\0"sv)); } TEST(TokenDatabase, Iterator) { auto it = kBasicDatabase.begin(); EXPECT_EQ(it->token, 1u); EXPECT_STREQ(it.entry().string, "hi!"); ++it; EXPECT_EQ(it->token, 2u); EXPECT_STREQ(it.entry().string, "goodbye"); EXPECT_EQ(it - kBasicDatabase.begin(), 1); ++it; EXPECT_EQ(it->token, 0xFFu); EXPECT_STREQ(it.entry().string, ":)"); EXPECT_EQ(it - kBasicDatabase.begin(), 2); ++it; EXPECT_EQ(it, kBasicDatabase.end()); EXPECT_EQ(static_cast(it - kBasicDatabase.begin()), kBasicDatabase.size()); } TEST(TokenDatabase, Iterator_PreIncrement) { auto it = kBasicDatabase.begin(); EXPECT_EQ((++it)->token, 2u); EXPECT_STREQ(it.entry().string, "goodbye"); } TEST(TokenDatabase, Iterator_PostIncrement) { auto it = kBasicDatabase.begin(); EXPECT_EQ((it++)->token, 1u); EXPECT_EQ(it->token, 2u); EXPECT_STREQ(it.entry().string, "goodbye"); } TEST(TokenDatabase, SingleEntryLookup_FirstEntry) { auto match = kBasicDatabase.Find(1); ASSERT_EQ(match.size(), 1u); EXPECT_FALSE(match.empty()); EXPECT_STREQ(match[0].string, "hi!"); for (const auto& entry : match) { EXPECT_EQ(entry.token, 1u); EXPECT_STREQ(entry.string, "hi!"); } } TEST(TokenDatabase, SingleEntryLookup_MiddleEntry) { auto match = kBasicDatabase.Find(2); ASSERT_EQ(match.size(), 1u); EXPECT_FALSE(match.empty()); EXPECT_STREQ(match[0].string, "goodbye"); } TEST(TokenDatabase, SingleEntryLookup_LastEntry) { auto match = kBasicDatabase.Find(0xff); ASSERT_EQ(match.size(), 1u); EXPECT_STREQ(match[0].string, ":)"); EXPECT_FALSE(match.empty()); } TEST(TokenDatabase, SingleEntryLookup_NonPresent) { EXPECT_TRUE(kBasicDatabase.Find(0).empty()); EXPECT_TRUE(kBasicDatabase.Find(3).empty()); EXPECT_TRUE(kBasicDatabase.Find(10239).empty()); EXPECT_TRUE(kBasicDatabase.Find(0xFFFFFFFFu).empty()); } TEST(TokenDatabase, SingleEntryLookup_NoMatches) { // Can also create the database at runtime. TokenDatabase tokens = TokenDatabase::Create(kBasicData); const auto match = tokens.Find(42); ASSERT_EQ(match.size(), 0u); EXPECT_TRUE(match.empty()); for (const auto& entry : match) { FAIL(); // There were no matches, so this code should never execute. static_cast(entry); } } alignas(TokenDatabase::RawEntry) constexpr char kCollisionsData[] = "TOKENS\0\0\x05\0\0\0\0\0\0\0" "\x01\0\0\0date" "\x01\0\0\0date" "\x01\0\0\0date" "\x02\0\0\0date" "\xFF\0\0\0date" "hi!\0goodbye\0:)\0\0"; constexpr TokenDatabase kCollisions = TokenDatabase::Create(); static_assert(kCollisions.size() == 5u); TEST(TokenDatabase, MultipleEntriesWithSameToken) { TokenDatabase::Entries match = kCollisions.Find(1); EXPECT_EQ(match.begin()->token, 1u); EXPECT_EQ(match.end()->token, 2u); ASSERT_EQ(match.size(), 3u); EXPECT_STREQ(match[0].string, "hi!"); EXPECT_STREQ(match[1].string, "goodbye"); EXPECT_STREQ(match[2].string, ":)"); for (const auto& entry : match) { EXPECT_EQ(entry.token, 1u); } } TEST(TokenDatabase, Empty) { constexpr TokenDatabase empty_db = TokenDatabase::Create(); static_assert(empty_db.size() == 0u); static_assert(empty_db.ok()); EXPECT_TRUE(empty_db.Find(0).empty()); EXPECT_TRUE(empty_db.Find(123).empty()); for (const auto& entry : empty_db) { FAIL(); // The database is empty; this should never execute. static_cast(entry); } } TEST(TokenDatabase, NullDatabase) { constexpr TokenDatabase empty_db; static_assert(empty_db.size() == 0u); static_assert(!empty_db.ok()); EXPECT_TRUE(empty_db.Find(0).empty()); } TEST(TokenDatabase, InvalidData) { constexpr TokenDatabase bad_db = TokenDatabase::Create("TOKENS\0\0"); static_assert(!bad_db.ok()); EXPECT_TRUE(bad_db.Find(0).empty()); } TEST(TokenDatabase, FromString) { TokenDatabase bad_db = TokenDatabase::Create(std::string("wow!")); EXPECT_FALSE(bad_db.ok()); } } // namespace } // namespace pw::tokenizer