aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlowasser <lowasser@google.com>2019-03-04 14:45:53 -0800
committerColin Decker <cgdecker@google.com>2019-03-07 15:41:04 -0500
commitdaed909a0b4e1e6039441077357793c1a3ea7913 (patch)
tree616aca2e425667300138fe647e513663a99f837a
parentc37130733fd791487a5c01cb0f3e9b2b4786534c (diff)
downloadguava-daed909a0b4e1e6039441077357793c1a3ea7913.tar.gz
Optimize ImmutableSet's hash flooding detection, using an algorithm which allows more false positives (though we calibrate the constant factors to compensate) but runs O(log n) times faster on average.
RELNOTES=n/a ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=236729050
-rw-r--r--guava-tests/benchmark/com/google/common/collect/ImmutableSetHashFloodingDetectionBenchmark.java158
-rw-r--r--guava/src/com/google/common/collect/ImmutableSet.java37
2 files changed, 185 insertions, 10 deletions
diff --git a/guava-tests/benchmark/com/google/common/collect/ImmutableSetHashFloodingDetectionBenchmark.java b/guava-tests/benchmark/com/google/common/collect/ImmutableSetHashFloodingDetectionBenchmark.java
new file mode 100644
index 000000000..9e45916b4
--- /dev/null
+++ b/guava-tests/benchmark/com/google/common/collect/ImmutableSetHashFloodingDetectionBenchmark.java
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2019 The Guava Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.common.collect;
+
+import com.google.caliper.BeforeExperiment;
+import com.google.caliper.Benchmark;
+import com.google.caliper.Param;
+import com.google.common.math.IntMath;
+import java.math.RoundingMode;
+
+/** Benchmark of implementations of {@link ImmutableSet#hashFloodingDetected(Object[])}. */
+public class ImmutableSetHashFloodingDetectionBenchmark {
+ private static final int TEST_CASES = 0x100;
+
+ @Param({"10", "100", "1000", "10000"})
+ int size;
+
+ @Param Impl impl;
+
+ private static final Object[][] tables = new Object[TEST_CASES][];
+
+ @BeforeExperiment
+ public void setUp() {
+ int tableSize = ImmutableSet.chooseTableSize(size);
+ int mask = tableSize - 1;
+ for (int i = 0; i < TEST_CASES; i++) {
+ tables[i] = new Object[tableSize];
+ for (int j = 0; j < size; j++) {
+ Object o = new Object();
+ for (int k = o.hashCode(); ; k++) {
+ int index = k & mask;
+ if (tables[i][index] == null) {
+ tables[i][index] = o;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ enum Impl {
+ EXHAUSTIVE {
+ int maxRunBeforeFallback(int tableSize) {
+ return 12 * IntMath.log2(tableSize, RoundingMode.UNNECESSARY);
+ }
+
+ @Override
+ boolean hashFloodingDetected(Object[] hashTable) {
+ int maxRunBeforeFallback = maxRunBeforeFallback(hashTable.length);
+
+ // Test for a run wrapping around the end of the table, then check for runs in the middle.
+ int endOfStartRun;
+ for (endOfStartRun = 0; endOfStartRun < hashTable.length; ) {
+ if (hashTable[endOfStartRun] == null) {
+ break;
+ }
+ endOfStartRun++;
+ if (endOfStartRun > maxRunBeforeFallback) {
+ return true;
+ }
+ }
+ int startOfEndRun;
+ for (startOfEndRun = hashTable.length - 1; startOfEndRun > endOfStartRun; startOfEndRun--) {
+ if (hashTable[startOfEndRun] == null) {
+ break;
+ }
+ if (endOfStartRun + (hashTable.length - 1 - startOfEndRun) > maxRunBeforeFallback) {
+ return true;
+ }
+ }
+ for (int i = endOfStartRun + 1; i < startOfEndRun; i++) {
+ for (int runLength = 0; i < startOfEndRun && hashTable[i] != null; i++) {
+ runLength++;
+ if (runLength > maxRunBeforeFallback) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+ },
+ SEPARATE_RANGES {
+ int maxRunBeforeFallback(int tableSize) {
+ return 13 * IntMath.log2(tableSize, RoundingMode.UNNECESSARY);
+ }
+
+ @Override
+ boolean hashFloodingDetected(Object[] hashTable) {
+ int maxRunBeforeFallback = maxRunBeforeFallback(hashTable.length);
+
+ // Test for a run wrapping around the end of the table, then check for runs in the middle.
+ int endOfStartRun;
+ for (endOfStartRun = 0; endOfStartRun < hashTable.length; ) {
+ if (hashTable[endOfStartRun] == null) {
+ break;
+ }
+ endOfStartRun++;
+ if (endOfStartRun > maxRunBeforeFallback) {
+ return true;
+ }
+ }
+ int startOfEndRun;
+ for (startOfEndRun = hashTable.length - 1; startOfEndRun > endOfStartRun; startOfEndRun--) {
+ if (hashTable[startOfEndRun] == null) {
+ break;
+ }
+ if (endOfStartRun + (hashTable.length - 1 - startOfEndRun) > maxRunBeforeFallback) {
+ return true;
+ }
+ }
+
+ // If this part returns true, there is definitely a run of size maxRunBeforeFallback/2.
+ // If this part returns false, there are definitely no runs of size >= maxRunBeforeFallback.
+ int testBlockSize = maxRunBeforeFallback / 2;
+ for (int i = endOfStartRun + 1; i + testBlockSize <= startOfEndRun; i += testBlockSize) {
+ boolean runGood = false;
+ for (int j = 0; j < testBlockSize; j++) {
+ if (hashTable[i + j] == null) {
+ runGood = true;
+ break;
+ }
+ }
+ if (!runGood) {
+ return true;
+ }
+ }
+ return false;
+ }
+ };
+
+ abstract boolean hashFloodingDetected(Object[] array);
+ }
+
+ @Benchmark
+ public int detect(int reps) {
+ int count = 0;
+ for (int i = 0; i < reps; i++) {
+ if (impl.hashFloodingDetected(tables[i & 0xFF])) {
+ count++;
+ }
+ }
+ return count;
+ }
+}
diff --git a/guava/src/com/google/common/collect/ImmutableSet.java b/guava/src/com/google/common/collect/ImmutableSet.java
index 6e870c3e6..cd55c8b86 100644
--- a/guava/src/com/google/common/collect/ImmutableSet.java
+++ b/guava/src/com/google/common/collect/ImmutableSet.java
@@ -637,10 +637,12 @@ public abstract class ImmutableSet<E> extends ImmutableCollection<E> implements
static final double HASH_FLOODING_FPP = 0.001;
// NB: yes, this is surprisingly high, but that's what the experiments said was necessary
- static final int MAX_RUN_MULTIPLIER = 12;
+ // The higher it is, the worse constant factors we are willing to accept.
+ static final int MAX_RUN_MULTIPLIER = 13;
/**
- * Checks the whole hash table for poor hash distribution. Takes O(n).
+ * Checks the whole hash table for poor hash distribution. Takes O(n) in the worst case, O(n / log
+ * n) on average.
*
* <p>The online hash flooding detecting in RegularSetBuilderImpl.add can detect e.g. many exactly
* matching hash codes, which would cause construction to take O(n^2), but can't detect e.g. hash
@@ -652,11 +654,20 @@ public abstract class ImmutableSet<E> extends ImmutableCollection<E> implements
* <p>Note that for a RegularImmutableSet with elements with truly random hash codes, contains
* operations take expected O(1) time but with high probability take O(log n) for at least some
* element. (https://en.wikipedia.org/wiki/Linear_probing#Analysis)
+ *
+ * <p>This method may return {@code true} up to {@link #HASH_FLOODING_FPP} of the time even on
+ * truly random input.
+ *
+ * <p>If this method returns false, there are definitely no runs of length at least {@code
+ * maxRunBeforeFallback(hashTable.length)} nonnull elements. If there are no runs of length at
+ * least {@code maxRunBeforeFallback(hashTable.length) / 2} nonnull elements, this method
+ * definitely returns false. In between those constraints, the result of this method is undefined,
+ * subject to the above {@link #HASH_FLOODING_FPP} constraint.
*/
static boolean hashFloodingDetected(Object[] hashTable) {
int maxRunBeforeFallback = maxRunBeforeFallback(hashTable.length);
- // Test for a run wrapping around the end of the table, then check for runs in the middle.
+ // Test for a run wrapping around the end of the table of length at least maxRunBeforeFallback.
int endOfStartRun;
for (endOfStartRun = 0; endOfStartRun < hashTable.length; ) {
if (hashTable[endOfStartRun] == null) {
@@ -676,22 +687,28 @@ public abstract class ImmutableSet<E> extends ImmutableCollection<E> implements
return true;
}
}
- for (int i = endOfStartRun + 1; i < startOfEndRun; i++) {
- for (int runLength = 0; i < startOfEndRun && hashTable[i] != null; i++) {
- runLength++;
- if (runLength > maxRunBeforeFallback) {
- return true;
+
+ // Now, break the remainder of the table into blocks of maxRunBeforeFallback/2 elements and
+ // check that each has at least one null.
+ int testBlockSize = maxRunBeforeFallback / 2;
+ blockLoop:
+ for (int i = endOfStartRun + 1; i + testBlockSize <= startOfEndRun; i += testBlockSize) {
+ for (int j = 0; j < testBlockSize; j++) {
+ if (hashTable[i + j] == null) {
+ continue blockLoop;
}
}
+ return true;
}
return false;
}
/**
* If more than this many consecutive positions are filled in a table of the specified size,
- * report probable hash flooding.
+ * report probable hash flooding. ({@link #hashFloodingDetected} may also report hash flooding if
+ * fewer consecutive positions are filled; see that method for details.)
*/
- static int maxRunBeforeFallback(int tableSize) {
+ private static int maxRunBeforeFallback(int tableSize) {
return MAX_RUN_MULTIPLIER * IntMath.log2(tableSize, RoundingMode.UNNECESSARY);
}