diff options
author | lowasser <lowasser@google.com> | 2019-03-04 14:45:53 -0800 |
---|---|---|
committer | Colin Decker <cgdecker@google.com> | 2019-03-07 15:41:04 -0500 |
commit | daed909a0b4e1e6039441077357793c1a3ea7913 (patch) | |
tree | 616aca2e425667300138fe647e513663a99f837a | |
parent | c37130733fd791487a5c01cb0f3e9b2b4786534c (diff) | |
download | guava-daed909a0b4e1e6039441077357793c1a3ea7913.tar.gz |
Optimize ImmutableSet's hash flooding detection, using an algorithm which allows more false positives (though we calibrate the constant factors to compensate) but runs O(log n) times faster on average.
RELNOTES=n/a
-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=236729050
-rw-r--r-- | guava-tests/benchmark/com/google/common/collect/ImmutableSetHashFloodingDetectionBenchmark.java | 158 | ||||
-rw-r--r-- | guava/src/com/google/common/collect/ImmutableSet.java | 37 |
2 files changed, 185 insertions, 10 deletions
diff --git a/guava-tests/benchmark/com/google/common/collect/ImmutableSetHashFloodingDetectionBenchmark.java b/guava-tests/benchmark/com/google/common/collect/ImmutableSetHashFloodingDetectionBenchmark.java new file mode 100644 index 000000000..9e45916b4 --- /dev/null +++ b/guava-tests/benchmark/com/google/common/collect/ImmutableSetHashFloodingDetectionBenchmark.java @@ -0,0 +1,158 @@ +/* + * Copyright (C) 2019 The Guava Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.common.collect; + +import com.google.caliper.BeforeExperiment; +import com.google.caliper.Benchmark; +import com.google.caliper.Param; +import com.google.common.math.IntMath; +import java.math.RoundingMode; + +/** Benchmark of implementations of {@link ImmutableSet#hashFloodingDetected(Object[])}. */ +public class ImmutableSetHashFloodingDetectionBenchmark { + private static final int TEST_CASES = 0x100; + + @Param({"10", "100", "1000", "10000"}) + int size; + + @Param Impl impl; + + private static final Object[][] tables = new Object[TEST_CASES][]; + + @BeforeExperiment + public void setUp() { + int tableSize = ImmutableSet.chooseTableSize(size); + int mask = tableSize - 1; + for (int i = 0; i < TEST_CASES; i++) { + tables[i] = new Object[tableSize]; + for (int j = 0; j < size; j++) { + Object o = new Object(); + for (int k = o.hashCode(); ; k++) { + int index = k & mask; + if (tables[i][index] == null) { + tables[i][index] = o; + break; + } + } + } + } + } + + enum Impl { + EXHAUSTIVE { + int maxRunBeforeFallback(int tableSize) { + return 12 * IntMath.log2(tableSize, RoundingMode.UNNECESSARY); + } + + @Override + boolean hashFloodingDetected(Object[] hashTable) { + int maxRunBeforeFallback = maxRunBeforeFallback(hashTable.length); + + // Test for a run wrapping around the end of the table, then check for runs in the middle. + int endOfStartRun; + for (endOfStartRun = 0; endOfStartRun < hashTable.length; ) { + if (hashTable[endOfStartRun] == null) { + break; + } + endOfStartRun++; + if (endOfStartRun > maxRunBeforeFallback) { + return true; + } + } + int startOfEndRun; + for (startOfEndRun = hashTable.length - 1; startOfEndRun > endOfStartRun; startOfEndRun--) { + if (hashTable[startOfEndRun] == null) { + break; + } + if (endOfStartRun + (hashTable.length - 1 - startOfEndRun) > maxRunBeforeFallback) { + return true; + } + } + for (int i = endOfStartRun + 1; i < startOfEndRun; i++) { + for (int runLength = 0; i < startOfEndRun && hashTable[i] != null; i++) { + runLength++; + if (runLength > maxRunBeforeFallback) { + return true; + } + } + } + return false; + } + }, + SEPARATE_RANGES { + int maxRunBeforeFallback(int tableSize) { + return 13 * IntMath.log2(tableSize, RoundingMode.UNNECESSARY); + } + + @Override + boolean hashFloodingDetected(Object[] hashTable) { + int maxRunBeforeFallback = maxRunBeforeFallback(hashTable.length); + + // Test for a run wrapping around the end of the table, then check for runs in the middle. + int endOfStartRun; + for (endOfStartRun = 0; endOfStartRun < hashTable.length; ) { + if (hashTable[endOfStartRun] == null) { + break; + } + endOfStartRun++; + if (endOfStartRun > maxRunBeforeFallback) { + return true; + } + } + int startOfEndRun; + for (startOfEndRun = hashTable.length - 1; startOfEndRun > endOfStartRun; startOfEndRun--) { + if (hashTable[startOfEndRun] == null) { + break; + } + if (endOfStartRun + (hashTable.length - 1 - startOfEndRun) > maxRunBeforeFallback) { + return true; + } + } + + // If this part returns true, there is definitely a run of size maxRunBeforeFallback/2. + // If this part returns false, there are definitely no runs of size >= maxRunBeforeFallback. + int testBlockSize = maxRunBeforeFallback / 2; + for (int i = endOfStartRun + 1; i + testBlockSize <= startOfEndRun; i += testBlockSize) { + boolean runGood = false; + for (int j = 0; j < testBlockSize; j++) { + if (hashTable[i + j] == null) { + runGood = true; + break; + } + } + if (!runGood) { + return true; + } + } + return false; + } + }; + + abstract boolean hashFloodingDetected(Object[] array); + } + + @Benchmark + public int detect(int reps) { + int count = 0; + for (int i = 0; i < reps; i++) { + if (impl.hashFloodingDetected(tables[i & 0xFF])) { + count++; + } + } + return count; + } +} diff --git a/guava/src/com/google/common/collect/ImmutableSet.java b/guava/src/com/google/common/collect/ImmutableSet.java index 6e870c3e6..cd55c8b86 100644 --- a/guava/src/com/google/common/collect/ImmutableSet.java +++ b/guava/src/com/google/common/collect/ImmutableSet.java @@ -637,10 +637,12 @@ public abstract class ImmutableSet<E> extends ImmutableCollection<E> implements static final double HASH_FLOODING_FPP = 0.001; // NB: yes, this is surprisingly high, but that's what the experiments said was necessary - static final int MAX_RUN_MULTIPLIER = 12; + // The higher it is, the worse constant factors we are willing to accept. + static final int MAX_RUN_MULTIPLIER = 13; /** - * Checks the whole hash table for poor hash distribution. Takes O(n). + * Checks the whole hash table for poor hash distribution. Takes O(n) in the worst case, O(n / log + * n) on average. * * <p>The online hash flooding detecting in RegularSetBuilderImpl.add can detect e.g. many exactly * matching hash codes, which would cause construction to take O(n^2), but can't detect e.g. hash @@ -652,11 +654,20 @@ public abstract class ImmutableSet<E> extends ImmutableCollection<E> implements * <p>Note that for a RegularImmutableSet with elements with truly random hash codes, contains * operations take expected O(1) time but with high probability take O(log n) for at least some * element. (https://en.wikipedia.org/wiki/Linear_probing#Analysis) + * + * <p>This method may return {@code true} up to {@link #HASH_FLOODING_FPP} of the time even on + * truly random input. + * + * <p>If this method returns false, there are definitely no runs of length at least {@code + * maxRunBeforeFallback(hashTable.length)} nonnull elements. If there are no runs of length at + * least {@code maxRunBeforeFallback(hashTable.length) / 2} nonnull elements, this method + * definitely returns false. In between those constraints, the result of this method is undefined, + * subject to the above {@link #HASH_FLOODING_FPP} constraint. */ static boolean hashFloodingDetected(Object[] hashTable) { int maxRunBeforeFallback = maxRunBeforeFallback(hashTable.length); - // Test for a run wrapping around the end of the table, then check for runs in the middle. + // Test for a run wrapping around the end of the table of length at least maxRunBeforeFallback. int endOfStartRun; for (endOfStartRun = 0; endOfStartRun < hashTable.length; ) { if (hashTable[endOfStartRun] == null) { @@ -676,22 +687,28 @@ public abstract class ImmutableSet<E> extends ImmutableCollection<E> implements return true; } } - for (int i = endOfStartRun + 1; i < startOfEndRun; i++) { - for (int runLength = 0; i < startOfEndRun && hashTable[i] != null; i++) { - runLength++; - if (runLength > maxRunBeforeFallback) { - return true; + + // Now, break the remainder of the table into blocks of maxRunBeforeFallback/2 elements and + // check that each has at least one null. + int testBlockSize = maxRunBeforeFallback / 2; + blockLoop: + for (int i = endOfStartRun + 1; i + testBlockSize <= startOfEndRun; i += testBlockSize) { + for (int j = 0; j < testBlockSize; j++) { + if (hashTable[i + j] == null) { + continue blockLoop; } } + return true; } return false; } /** * If more than this many consecutive positions are filled in a table of the specified size, - * report probable hash flooding. + * report probable hash flooding. ({@link #hashFloodingDetected} may also report hash flooding if + * fewer consecutive positions are filled; see that method for details.) */ - static int maxRunBeforeFallback(int tableSize) { + private static int maxRunBeforeFallback(int tableSize) { return MAX_RUN_MULTIPLIER * IntMath.log2(tableSize, RoundingMode.UNNECESSARY); } |