diff options
author | Alexei Frolov <frolv@google.com> | 2019-11-27 14:38:39 -0800 |
---|---|---|
committer | Alexei Frolov <frolv@google.com> | 2019-12-02 18:39:29 +0000 |
commit | 82d3cb35d37f9f4011a47f0b33acf55fd2e3f8c7 (patch) | |
tree | 08d6a42c30760a5644d7b9550dfbf177303d32ad /pw_varint | |
parent | af744f59aa1767b2266a6521991d8e2ab25c6861 (diff) | |
download | pigweed-82d3cb35d37f9f4011a47f0b33acf55fd2e3f8c7.tar.gz |
pw_varint: Add varint module
This change adds a pw_varint module containing functions for encoding
and decoding variable-length integers.
Change-Id: I50bdf6d9d6762bffb93ee638683de53afed9c849
Diffstat (limited to 'pw_varint')
-rw-r--r-- | pw_varint/BUILD.gn | 49 | ||||
-rw-r--r-- | pw_varint/README.md | 1 | ||||
-rw-r--r-- | pw_varint/public/pw_varint/varint.h | 90 | ||||
-rw-r--r-- | pw_varint/varint.cc | 77 | ||||
-rw-r--r-- | pw_varint/varint_test.cc | 304 |
5 files changed, 521 insertions, 0 deletions
diff --git a/pw_varint/BUILD.gn b/pw_varint/BUILD.gn new file mode 100644 index 000000000..80af01932 --- /dev/null +++ b/pw_varint/BUILD.gn @@ -0,0 +1,49 @@ +# Copyright 2019 The Pigweed Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. + +import("$dir_pw_unit_test/test.gni") + +config("default_config") { + include_dirs = [ "public" ] +} + +source_set("pw_varint") { + public_configs = [ + "$dir_pw_build:pw_default_cpp", + ":default_config", + ] + public_deps = [ + "$dir_pw_span", + ] + sources = [ + "public/pw_varint/varint.h", + "varint.cc", + ] + public = [ + "public/pw_varint/varint.h", + ] +} + +pw_test_group("tests") { + tests = [ ":varint_test" ] +} + +pw_test("varint_test") { + deps = [ + ":pw_varint", + ] + sources = [ + "varint_test.cc", + ] +} diff --git a/pw_varint/README.md b/pw_varint/README.md new file mode 100644 index 000000000..07e989db6 --- /dev/null +++ b/pw_varint/README.md @@ -0,0 +1 @@ +# pw\_varint: Variable-length integer encoding diff --git a/pw_varint/public/pw_varint/varint.h b/pw_varint/public/pw_varint/varint.h new file mode 100644 index 000000000..0e0b72d85 --- /dev/null +++ b/pw_varint/public/pw_varint/varint.h @@ -0,0 +1,90 @@ +// Copyright 2019 The Pigweed Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. +#pragma once + +#include <cstddef> +#include <cstdint> +#include <type_traits> + +#include "pw_span/span.h" + +namespace pw::varint { + +// The maximum number of bytes occupied by an encoded varint. The maximum +// uint64_t occupies 10 bytes when encoded. +inline constexpr size_t kMaxVarintSizeBytes = 10; + +// ZigZag encodes a signed integer. This maps small negative numbers to small, +// unsigned positive numbers, which improves their density for LEB128 encoding. +// +// ZigZag encoding works by moving the sign bit from the most-significant bit to +// the least-significant bit. For the signed k-bit integer n, the formula is +// +// (n << 1) ^ (n >> (k - 1)) +// +// See the following for a description of ZigZag encoding: +// https://developers.google.com/protocol-buffers/docs/encoding#types +template <typename T> +constexpr std::make_unsigned_t<T> ZigZagEncode(T n) { + static_assert(std::is_signed<T>(), "Zig-zag encoding is for signed integers"); + using U = std::make_unsigned_t<T>; + return (static_cast<U>(n) << 1) ^ static_cast<U>(n >> (sizeof(T) * 8 - 1)); +} + +// Encodes a uint64_t with Little-Endian Base 128 (LEB128) encoding. +size_t EncodeLittleEndianBase128(uint64_t integer, const span<uint8_t>& output); + +// Encodes the provided integer using a variable-length encoding and returns the +// number of bytes written. +// +// The encoding is the same as used in protocol buffers. Signed integers are +// ZigZag encoded to remove leading 1s from small negative numbers, then the +// resulting number is encoded as Little Endian Base 128 (LEB128). Unsigned +// integers are encoded directly as LEB128. +// +// Returns the number of bytes written or 0 if the result didn't fit in the +// encoding buffer. +template <typename T> +size_t EncodeVarint(T integer, const span<uint8_t>& output) { + if constexpr (std::is_signed<T>()) { + return EncodeLittleEndianBase128(ZigZagEncode(integer), output); + } else { + return EncodeLittleEndianBase128(integer, output); + } +} + +// Decodes a varint-encoded value. If reading into a signed integer, the value +// is ZigZag decoded. +// +// Returns the number of bytes read from the input if successful. Returns zero +// if the result does not fit in a int64_t / uint64_t or if the input is +// exhausted before the number terminates. Reads a maximum of 10 bytes. +// +// The following example decodes multiple varints from a buffer: +// +// while (!data.empty()) { +// int64_t value; +// size_t bytes = DecodeVarint(data, &value); +// +// if (bytes == 0u) { +// return Status::DATA_LOSS; +// } +// results.push_back(value); +// data = data.subspan(bytes) +// } +// +size_t DecodeVarint(const span<const uint8_t>& input, int64_t* value); +size_t DecodeVarint(const span<const uint8_t>& input, uint64_t* value); + +} // namespace pw::varint diff --git a/pw_varint/varint.cc b/pw_varint/varint.cc new file mode 100644 index 000000000..3d0e759d7 --- /dev/null +++ b/pw_varint/varint.cc @@ -0,0 +1,77 @@ +// Copyright 2019 The Pigweed Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +#include "pw_varint/varint.h" + +#include <algorithm> + +namespace pw::varint { +namespace { + +constexpr int64_t ZigZagDecode64(uint64_t n) { + return static_cast<int64_t>((n >> 1) ^ (~(n & 1) + 1)); +} + +} // namespace + +size_t EncodeLittleEndianBase128(uint64_t integer, + const span<uint8_t>& output) { + size_t written = 0; + do { + if (written >= output.size()) { + return 0; + } + + // Grab 7 bits; the eighth bit is set to 1 to indicate more data coming. + output[written++] = static_cast<uint8_t>(integer) | '\x80'; + integer >>= 7; + } while (integer != 0u); + + output[written - 1] &= '\x7f'; // clear the top bit of the last byte + return written; +} + +size_t DecodeVarint(const span<const uint8_t>& input, int64_t* value) { + const size_t bytes = DecodeVarint(input, reinterpret_cast<uint64_t*>(value)); + *value = ZigZagDecode64(*value); + return bytes; +} + +size_t DecodeVarint(const span<const uint8_t>& input, uint64_t* value) { + uint64_t decoded_value = 0; + uint_fast8_t count = 0; + + // The largest 64-bit ints require 10 B. + const size_t max_count = std::min(kMaxVarintSizeBytes, input.size()); + + while (true) { + if (count >= max_count) { + return 0; + } + + // Add the bottom seven bits of the next byte to the result. + decoded_value |= static_cast<uint64_t>(input[count] & '\x7f') + << (7 * count); + + // Stop decoding if the top bit is not set. + if ((input[count++] & '\x80') == 0) { + break; + } + } + + *value = decoded_value; + return count; +} + +} // namespace pw::varint diff --git a/pw_varint/varint_test.cc b/pw_varint/varint_test.cc new file mode 100644 index 000000000..99bacde04 --- /dev/null +++ b/pw_varint/varint_test.cc @@ -0,0 +1,304 @@ +// Copyright 2019 The Pigweed Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +#include "pw_varint/varint.h" + +#include <cinttypes> +#include <cstdint> +#include <cstring> +#include <limits> + +#include "gtest/gtest.h" + +namespace pw::varint { +namespace { + +class Varint : public ::testing::Test { + protected: + Varint() : buffer_{'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'} {} + uint8_t buffer_[10]; +}; + +TEST_F(Varint, EncodeSizeUnsigned32_SmallSingleByte) { + ASSERT_EQ(1u, EncodeVarint(UINT32_C(0), buffer_)); + EXPECT_EQ(0u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(UINT32_C(1), buffer_)); + EXPECT_EQ(1u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(UINT32_C(2), buffer_)); + EXPECT_EQ(2u, buffer_[0]); +} + +TEST_F(Varint, EncodeSizeUnsigned32_LargeSingleByte) { + ASSERT_EQ(1u, EncodeVarint(UINT32_C(63), buffer_)); + EXPECT_EQ(63u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(UINT32_C(64), buffer_)); + EXPECT_EQ(64u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(UINT32_C(126), buffer_)); + EXPECT_EQ(126u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(UINT32_C(127), buffer_)); + EXPECT_EQ(127u, buffer_[0]); +} + +TEST_F(Varint, EncodeSizeUnsigned32_MultiByte) { + ASSERT_EQ(2u, EncodeVarint(UINT32_C(128), buffer_)); + EXPECT_EQ(std::memcmp("\x80\x01", buffer_, 2), 0); + ASSERT_EQ(2u, EncodeVarint(UINT32_C(129), buffer_)); + EXPECT_EQ(std::memcmp("\x81\x01", buffer_, 2), 0); + + ASSERT_EQ(5u, + EncodeVarint(std::numeric_limits<uint32_t>::max() - 1, buffer_)); + EXPECT_EQ(std::memcmp("\xfe\xff\xff\xff\x0f", buffer_, 5), 0); + + ASSERT_EQ(5u, EncodeVarint(std::numeric_limits<uint32_t>::max(), buffer_)); + EXPECT_EQ(std::memcmp("\xff\xff\xff\xff\x0f", buffer_, 5), 0); +} + +TEST_F(Varint, EncodeSizeSigned32_SmallSingleByte) { + ASSERT_EQ(1u, EncodeVarint(INT32_C(0), buffer_)); + EXPECT_EQ(0u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(INT32_C(-1), buffer_)); + EXPECT_EQ(1u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(INT32_C(1), buffer_)); + EXPECT_EQ(2u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(INT32_C(-2), buffer_)); + EXPECT_EQ(3u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(INT32_C(2), buffer_)); + EXPECT_EQ(4u, buffer_[0]); +} + +TEST_F(Varint, EncodeSizeSigned32_LargeSingleByte) { + ASSERT_EQ(1u, EncodeVarint(INT32_C(-63), buffer_)); + EXPECT_EQ(125u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(INT32_C(63), buffer_)); + EXPECT_EQ(126u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(INT32_C(-64), buffer_)); + EXPECT_EQ(127u, buffer_[0]); +} + +TEST_F(Varint, EncodeSizeSigned32_MultiByte) { + ASSERT_EQ(2u, EncodeVarint(INT32_C(64), buffer_)); + EXPECT_EQ(std::memcmp("\x80\x01", buffer_, 2), 0); + ASSERT_EQ(2u, EncodeVarint(INT32_C(-65), buffer_)); + EXPECT_EQ(std::memcmp("\x81\x01", buffer_, 2), 0); + ASSERT_EQ(2u, EncodeVarint(INT32_C(65), buffer_)); + EXPECT_EQ(std::memcmp("\x82\x01", buffer_, 2), 0); + + ASSERT_EQ(5u, EncodeVarint(std::numeric_limits<int32_t>::min(), buffer_)); + EXPECT_EQ(std::memcmp("\xff\xff\xff\xff\x0f", buffer_, 5), 0); + + ASSERT_EQ(5u, EncodeVarint(std::numeric_limits<int32_t>::max(), buffer_)); + EXPECT_EQ(std::memcmp("\xfe\xff\xff\xff\x0f", buffer_, 5), 0); +} + +TEST_F(Varint, EncodeSizeUnsigned64_SmallSingleByte) { + ASSERT_EQ(1u, EncodeVarint(UINT64_C(0), buffer_)); + EXPECT_EQ(0u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(UINT64_C(1), buffer_)); + EXPECT_EQ(1u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(UINT64_C(2), buffer_)); + EXPECT_EQ(2u, buffer_[0]); +} + +TEST_F(Varint, EncodeSizeUnsigned64_LargeSingleByte) { + ASSERT_EQ(1u, EncodeVarint(UINT64_C(63), buffer_)); + EXPECT_EQ(63u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(UINT64_C(64), buffer_)); + EXPECT_EQ(64u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(UINT64_C(126), buffer_)); + EXPECT_EQ(126u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(UINT64_C(127), buffer_)); + EXPECT_EQ(127u, buffer_[0]); +} + +TEST_F(Varint, EncodeSizeUnsigned64_MultiByte) { + ASSERT_EQ(2u, EncodeVarint(UINT64_C(128), buffer_)); + EXPECT_EQ(std::memcmp("\x80\x01", buffer_, 2), 0); + ASSERT_EQ(2u, EncodeVarint(UINT64_C(129), buffer_)); + EXPECT_EQ(std::memcmp("\x81\x01", buffer_, 2), 0); + + ASSERT_EQ(5u, + EncodeVarint(std::numeric_limits<uint32_t>::max() - 1, buffer_)); + EXPECT_EQ(std::memcmp("\xfe\xff\xff\xff\x0f", buffer_, 5), 0); + + ASSERT_EQ(5u, EncodeVarint(std::numeric_limits<uint32_t>::max(), buffer_)); + EXPECT_EQ(std::memcmp("\xff\xff\xff\xff\x0f", buffer_, 5), 0); + + ASSERT_EQ(10u, + EncodeVarint(std::numeric_limits<uint64_t>::max() - 1, buffer_)); + EXPECT_EQ( + std::memcmp("\xfe\xff\xff\xff\xff\xff\xff\xff\xff\x01", buffer_, 10), 0); + + ASSERT_EQ(10u, EncodeVarint(std::numeric_limits<uint64_t>::max(), buffer_)); + EXPECT_EQ( + std::memcmp("\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01", buffer_, 10), 0); +} + +TEST_F(Varint, EncodeSizeSigned64_SmallSingleByte) { + ASSERT_EQ(1u, EncodeVarint(INT64_C(0), buffer_)); + EXPECT_EQ(0u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(INT64_C(-1), buffer_)); + EXPECT_EQ(1u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(INT64_C(1), buffer_)); + EXPECT_EQ(2u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(INT64_C(-2), buffer_)); + EXPECT_EQ(3u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(INT64_C(2), buffer_)); + EXPECT_EQ(4u, buffer_[0]); +} + +TEST_F(Varint, EncodeSizeSigned64_LargeSingleByte) { + ASSERT_EQ(1u, EncodeVarint(INT64_C(-63), buffer_)); + EXPECT_EQ(125u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(INT64_C(63), buffer_)); + EXPECT_EQ(126u, buffer_[0]); + ASSERT_EQ(1u, EncodeVarint(INT64_C(-64), buffer_)); + EXPECT_EQ(127u, buffer_[0]); +} + +TEST_F(Varint, EncodeSizeSigned64_MultiByte) { + ASSERT_EQ(2u, EncodeVarint(INT64_C(64), buffer_)); + EXPECT_EQ(std::memcmp("\x80\x01", buffer_, 2), 0); + ASSERT_EQ(2u, EncodeVarint(INT64_C(-65), buffer_)); + EXPECT_EQ(std::memcmp("\x81\x01", buffer_, 2), 0); + ASSERT_EQ(2u, EncodeVarint(INT64_C(65), buffer_)); + EXPECT_EQ(std::memcmp("\x82\x01", buffer_, 2), 0); + + ASSERT_EQ( + 5u, + EncodeVarint(static_cast<int64_t>(std::numeric_limits<int32_t>::min()), + buffer_)); + EXPECT_EQ(std::memcmp("\xff\xff\xff\xff\x0f", buffer_, 5), 0); + + ASSERT_EQ( + 5u, + EncodeVarint(static_cast<int64_t>(std::numeric_limits<int32_t>::max()), + buffer_)); + EXPECT_EQ(std::memcmp("\xfe\xff\xff\xff\x0f", buffer_, 5), 0); + + ASSERT_EQ(10u, EncodeVarint(std::numeric_limits<int64_t>::min(), buffer_)); + EXPECT_EQ( + std::memcmp("\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01", buffer_, 10), 0); + + ASSERT_EQ(10u, EncodeVarint(std::numeric_limits<int64_t>::max(), buffer_)); + EXPECT_EQ( + std::memcmp("\xfe\xff\xff\xff\xff\xff\xff\xff\xff\x01", buffer_, 10), 0); +} + +TEST_F(Varint, EncodeDecodeSigned32) { + // Set the increment to 1 to test every number (this is slow) + static constexpr int kIncrement = 1'000'009; + + int32_t i = std::numeric_limits<int32_t>::min(); + while (true) { + size_t encoded = EncodeVarint(i, buffer_); + + int64_t result; + size_t decoded = DecodeVarint(buffer_, &result); + + EXPECT_EQ(encoded, decoded); + ASSERT_EQ(i, result); + + if (i > std::numeric_limits<int32_t>::max() - kIncrement) { + break; + } + + i += kIncrement; + } +} + +TEST_F(Varint, EncodeDecodeUnsigned32) { + // Set the increment to 1 to test every number (this is slow) + static constexpr int kIncrement = 1'000'009; + + uint32_t i = 0; + while (true) { + size_t encoded = EncodeVarint(i, buffer_); + + uint64_t result; + size_t decoded = DecodeVarint(buffer_, &result); + + EXPECT_EQ(encoded, decoded); + ASSERT_EQ(i, result); + + if (i > std::numeric_limits<uint32_t>::max() - kIncrement) { + break; + } + + i += kIncrement; + } +} + +template <size_t kStringSize> +auto MakeBuffer(const char (&data)[kStringSize]) { + constexpr size_t kSizeBytes = kStringSize - 1; + static_assert(kSizeBytes <= 10, "Varint arrays never need be larger than 10"); + + std::array<uint8_t, kSizeBytes> array; + std::memcpy(array.data(), data, kSizeBytes); + return array; +} + +TEST(VarintDecode, DecodeSigned64_SingleByte) { + int64_t value = -1234; + + EXPECT_EQ(DecodeVarint(MakeBuffer("\x00"), &value), 1u); + EXPECT_EQ(value, 0); + + EXPECT_EQ(DecodeVarint(MakeBuffer("\x01"), &value), 1u); + EXPECT_EQ(value, -1); + + EXPECT_EQ(DecodeVarint(MakeBuffer("\x02"), &value), 1u); + EXPECT_EQ(value, 1); + + EXPECT_EQ(DecodeVarint(MakeBuffer("\x03"), &value), 1u); + EXPECT_EQ(value, -2); + + EXPECT_EQ(DecodeVarint(MakeBuffer("\x04"), &value), 1u); + EXPECT_EQ(value, 2); + + EXPECT_EQ(DecodeVarint(MakeBuffer("\x04"), &value), 1u); + EXPECT_EQ(value, 2); +} + +TEST(VarintDecode, DecodeSigned64_MultiByte) { + int64_t value = -1234; + + EXPECT_EQ(DecodeVarint(MakeBuffer("\x80\x01"), &value), 2u); + EXPECT_EQ(value, 64); + + EXPECT_EQ(DecodeVarint(MakeBuffer("\x81\x01"), &value), 2u); + EXPECT_EQ(value, -65); + + EXPECT_EQ(DecodeVarint(MakeBuffer("\x82\x01"), &value), 2u); + EXPECT_EQ(value, 65); + + EXPECT_EQ(DecodeVarint(MakeBuffer("\xff\xff\xff\xff\x0f"), &value), 5u); + EXPECT_EQ(value, std::numeric_limits<int32_t>::min()); + + EXPECT_EQ(DecodeVarint(MakeBuffer("\xfe\xff\xff\xff\x0f"), &value), 5u); + EXPECT_EQ(value, std::numeric_limits<int32_t>::max()); + + EXPECT_EQ(DecodeVarint(MakeBuffer("\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01"), + &value), + 10u); + EXPECT_EQ(value, std::numeric_limits<int64_t>::min()); + + EXPECT_EQ(DecodeVarint(MakeBuffer("\xfe\xff\xff\xff\xff\xff\xff\xff\xff\x01"), + &value), + 10u); + EXPECT_EQ(value, std::numeric_limits<int64_t>::max()); +} + +} // namespace +} // namespace pw::varint |