aboutsummaryrefslogtreecommitdiff
path: root/binary_data_histogram_unittest.cc
diff options
context:
space:
mode:
Diffstat (limited to 'binary_data_histogram_unittest.cc')
-rw-r--r--binary_data_histogram_unittest.cc132
1 files changed, 132 insertions, 0 deletions
diff --git a/binary_data_histogram_unittest.cc b/binary_data_histogram_unittest.cc
new file mode 100644
index 0000000..ca71010
--- /dev/null
+++ b/binary_data_histogram_unittest.cc
@@ -0,0 +1,132 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/zucchini/binary_data_histogram.h"
+
+#include <stddef.h>
+
+#include <memory>
+#include <vector>
+
+#include "components/zucchini/buffer_view.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace zucchini {
+
+TEST(OutlierDetectorTest, Basic) {
+ auto make_detector = [](const std::vector<double>& values) {
+ auto detector = std::make_unique<OutlierDetector>();
+ for (double v : values)
+ detector->Add(v);
+ detector->Prepare();
+ return detector;
+ };
+
+ std::unique_ptr<OutlierDetector> detector;
+ // No data: Should at least not cause error.
+ detector = make_detector({});
+ EXPECT_EQ(0, detector->DecideOutlier(0.0));
+ // Single point: Trivially inert.
+ detector = make_detector({0.5});
+ EXPECT_EQ(0, detector->DecideOutlier(0.1));
+ EXPECT_EQ(0, detector->DecideOutlier(0.5));
+ EXPECT_EQ(0, detector->DecideOutlier(0.9));
+ // Two identical points: StdDev is 0, so falls back to built-in tolerance.
+ detector = make_detector({0.5, 0.5});
+ EXPECT_EQ(-1, detector->DecideOutlier(0.3));
+ EXPECT_EQ(0, detector->DecideOutlier(0.499));
+ EXPECT_EQ(0, detector->DecideOutlier(0.5));
+ EXPECT_EQ(0, detector->DecideOutlier(0.501));
+ EXPECT_EQ(1, detector->DecideOutlier(0.7));
+ // Two separate points: Outliner test is pretty lax.
+ detector = make_detector({0.4, 0.6});
+ EXPECT_EQ(-1, detector->DecideOutlier(0.2));
+ EXPECT_EQ(0, detector->DecideOutlier(0.3));
+ EXPECT_EQ(0, detector->DecideOutlier(0.5));
+ EXPECT_EQ(0, detector->DecideOutlier(0.7));
+ EXPECT_EQ(1, detector->DecideOutlier(0.8));
+ // Sharpen distribution by clustering toward norm: Now test is stricter.
+ detector = make_detector({0.4, 0.47, 0.48, 0.49, 0.50, 0.51, 0.52, 0.6});
+ EXPECT_EQ(-1, detector->DecideOutlier(0.3));
+ EXPECT_EQ(0, detector->DecideOutlier(0.4));
+ EXPECT_EQ(0, detector->DecideOutlier(0.5));
+ EXPECT_EQ(0, detector->DecideOutlier(0.6));
+ EXPECT_EQ(1, detector->DecideOutlier(0.7));
+ // Shift numbers around: Mean is 0.3, and data order scrambled.
+ detector = make_detector({0.28, 0.2, 0.31, 0.4, 0.29, 0.32, 0.27, 0.30});
+ EXPECT_EQ(-1, detector->DecideOutlier(0.0));
+ EXPECT_EQ(-1, detector->DecideOutlier(0.1));
+ EXPECT_EQ(0, detector->DecideOutlier(0.2));
+ EXPECT_EQ(0, detector->DecideOutlier(0.3));
+ EXPECT_EQ(0, detector->DecideOutlier(0.4));
+ EXPECT_EQ(1, detector->DecideOutlier(0.5));
+ EXPECT_EQ(1, detector->DecideOutlier(1.0));
+ // Typical usage: Potential outlier would be part of original input data!
+ detector = make_detector({0.3, 0.29, 0.31, 0.0, 0.3, 0.32, 0.3, 0.29, 0.6});
+ EXPECT_EQ(-1, detector->DecideOutlier(0.0));
+ EXPECT_EQ(0, detector->DecideOutlier(0.28));
+ EXPECT_EQ(0, detector->DecideOutlier(0.29));
+ EXPECT_EQ(0, detector->DecideOutlier(0.3));
+ EXPECT_EQ(0, detector->DecideOutlier(0.31));
+ EXPECT_EQ(0, detector->DecideOutlier(0.32));
+ EXPECT_EQ(1, detector->DecideOutlier(0.6));
+}
+
+TEST(BinaryDataHistogramTest, Basic) {
+ constexpr double kUninitScore = -1;
+
+ constexpr uint8_t kTestData[] = {2, 137, 42, 0, 0, 0, 7, 11, 1, 11, 255};
+ const size_t n = sizeof(kTestData);
+ ConstBufferView region(kTestData, n);
+
+ std::vector<BinaryDataHistogram> prefix_histograms(n + 1); // Short to long.
+ std::vector<BinaryDataHistogram> suffix_histograms(n + 1); // Long to short.
+
+ for (size_t i = 0; i <= n; ++i) {
+ ConstBufferView prefix(region.begin(), i);
+ ConstBufferView suffix(region.begin() + i, n - i);
+ // If regions are smaller than 2 bytes then it is invalid. Else valid.
+ EXPECT_EQ(prefix.size() >= 2, prefix_histograms[i].Compute(prefix));
+ EXPECT_EQ(suffix.size() >= 2, suffix_histograms[i].Compute(suffix));
+ // IsValid() returns the same results.
+ EXPECT_EQ(prefix.size() >= 2, prefix_histograms[i].IsValid());
+ EXPECT_EQ(suffix.size() >= 2, suffix_histograms[i].IsValid());
+ }
+
+ // Full-prefix = full-suffix = full data.
+ EXPECT_EQ(0.0, prefix_histograms[n].Distance(suffix_histograms[0]));
+ EXPECT_EQ(0.0, suffix_histograms[0].Distance(prefix_histograms[n]));
+
+ // Testing heuristics without overreliance on implementation details.
+
+ // Strict prefixes, in increasing size. Compare against full data.
+ double prev_prefix_score = kUninitScore;
+ for (size_t i = 2; i < n; ++i) {
+ double score = prefix_histograms[i].Distance(prefix_histograms[n]);
+ // Positivity.
+ EXPECT_GT(score, 0.0);
+ // Symmetry.
+ EXPECT_EQ(score, prefix_histograms[n].Distance(prefix_histograms[i]));
+ // Distance should decrease as prefix gets nearer to full data.
+ if (prev_prefix_score != kUninitScore)
+ EXPECT_LT(score, prev_prefix_score);
+ prev_prefix_score = score;
+ }
+
+ // Strict suffixes, in decreasing size. Compare against full data.
+ double prev_suffix_score = -1;
+ for (size_t i = 1; i <= n - 2; ++i) {
+ double score = suffix_histograms[i].Distance(suffix_histograms[0]);
+ // Positivity.
+ EXPECT_GT(score, 0.0);
+ // Symmetry.
+ EXPECT_EQ(score, suffix_histograms[0].Distance(suffix_histograms[i]));
+ // Distance should increase as suffix gets farther from full data.
+ if (prev_suffix_score != kUninitScore)
+ EXPECT_GT(score, prev_suffix_score);
+ prev_suffix_score = score;
+ }
+}
+
+} // namespace zucchini