summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortanjent@gmail.com <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>2011-02-28 06:03:12 +0000
committertanjent@gmail.com <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>2011-02-28 06:03:12 +0000
commitbabb553380f2d223ebc4730830233c5d224963e8 (patch)
treee2cca8323914c05fc6c888841d581a0c7db0c384
parent9d17d0b842230279e2a89b2e65a7dde05494d529 (diff)
downloadsrc-babb553380f2d223ebc4730830233c5d224963e8.tar.gz
Tweak Murmur3, new more rigorous sanity test, combinatorial block test
git-svn-id: http://smhasher.googlecode.com/svn/trunk@76 77a7d1d3-4c08-bdc2-d393-d5859734b01a
-rw-r--r--Bitvec.cpp14
-rw-r--r--Bitvec.h1
-rw-r--r--Hashes.cpp32
-rw-r--r--Hashes.h4
-rw-r--r--KeysetTest.cpp178
-rw-r--r--KeysetTest.h56
-rw-r--r--MurmurHash3.cpp200
-rw-r--r--MurmurHashAligned.cpp2
-rw-r--r--MurmurHashAligned2.cpp2
-rw-r--r--MurmurHashNeutral2.cpp2
-rw-r--r--MurmurHashTest.cpp17
-rw-r--r--SpeedTest.cpp57
-rw-r--r--SpeedTest.h55
-rw-r--r--Types.h1
-rw-r--r--main.cpp238
15 files changed, 480 insertions, 379 deletions
diff --git a/Bitvec.cpp b/Bitvec.cpp
index 463c1e4..6939980 100644
--- a/Bitvec.cpp
+++ b/Bitvec.cpp
@@ -71,6 +71,20 @@ void printhex32 ( void * blob, int len )
printf("}");
}
+void printbytes ( void * blob, int len )
+{
+ uint8_t * d = (uint8_t*)blob;
+
+ printf("{ ");
+
+ for(int i = 0; i < len; i++)
+ {
+ printf("0x%02x, ",d[i]);
+ }
+
+ printf(" };");
+}
+
//-----------------------------------------------------------------------------
uint32_t getbit ( void * block, int len, uint32_t bit )
diff --git a/Bitvec.h b/Bitvec.h
index fce97d5..1a475d1 100644
--- a/Bitvec.h
+++ b/Bitvec.h
@@ -8,6 +8,7 @@
void printbits ( void * blob, int len );
void printhex32 ( void * blob, int len );
+void printbytes ( void * blob, int len );
uint32_t getbit ( void * blob, int len, uint32_t bit );
uint32_t getbit_wrap ( void * blob, int len, uint32_t bit );
diff --git a/Hashes.cpp b/Hashes.cpp
index f6787c6..cebd37f 100644
--- a/Hashes.cpp
+++ b/Hashes.cpp
@@ -2,6 +2,13 @@
#include "Random.h"
+
+#include <stdlib.h>
+//#include <stdint.h>
+#include <assert.h>
+#include <emmintrin.h>
+#include <xmmintrin.h>
+
//----------------------------------------------------------------------------
// fake / bad hashes
@@ -54,6 +61,20 @@ void sumhash ( const void * key, int len, uint32_t seed, void * out )
*(uint32_t*)out = h;
}
+void sumhash32 ( const void * key, int len, uint32_t seed, void * out )
+{
+ uint32_t h = seed;
+
+ const uint32_t * data = (const uint32_t*)key;
+
+ for(int i = 0; i < len/4; i++)
+ {
+ h += data[i];
+ }
+
+ *(uint32_t*)out = h;
+}
+
void DoNothingHash ( const void *, int, uint32_t, void * )
{
return;
@@ -62,20 +83,23 @@ void DoNothingHash ( const void *, int, uint32_t, void * )
//-----------------------------------------------------------------------------
// One-byte-at-a-time hash based on Murmur's mix
-uint32_t MurmurOAAT ( const void * key, int len, uint32_t h )
+void MurmurOAAT ( const void * key, int len, uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
- h ^= len;
+ uint32_t h = seed ^ len;
for(int i = 0; i < len; i++)
{
h ^= data[i];
h *= 0x5bd1e995;
- h ^= h >> 16;
+ h ^= h >> 15;
}
- return h;
+ h *= 0x5bd1e995;
+ h ^= h >> 15;
+
+ *(uint32_t*)out = h;
}
//----------------------------------------------------------------------------
diff --git a/Hashes.h b/Hashes.h
index 1aad04c..a580113 100644
--- a/Hashes.h
+++ b/Hashes.h
@@ -9,6 +9,9 @@
//----------
// These are _not_ hash functions (even though people tend to use crc32 as one...)
+void sumhash ( const void * key, int len, uint32_t seed, void * out );
+void sumhash32 ( const void * key, int len, uint32_t seed, void * out );
+
void DoNothingHash ( const void * key, int len, uint32_t seed, void * out );
void crc32 ( const void * key, int len, uint32_t seed, void * out );
@@ -28,6 +31,7 @@ void sha1_32a ( const void * key, int len, uint32_t seed, void * ou
void FNV ( const void * key, int len, uint32_t seed, void * out );
void SuperFastHash ( const void * key, int len, uint32_t seed, void * out );
void lookup3_test ( const void * key, int len, uint32_t seed, void * out );
+void MurmurOAAT ( const void * key, int len, uint32_t seed, void * out );
//----------
// MurmurHash2
diff --git a/KeysetTest.cpp b/KeysetTest.cpp
index 70b63de..6436826 100644
--- a/KeysetTest.cpp
+++ b/KeysetTest.cpp
@@ -29,55 +29,81 @@ void QuickBrownFox ( pfHash hash, const int hashbits )
}
//----------------------------------------------------------------------------
-// Alignment of the keys should not affect the hash value - if it does,
-// something is broken.
+// Basic sanity checks -
-void AlignmentTest ( pfHash hash, const int hashbits )
-{
- const int hashbytes = hashbits / 8;
+// A hash function should not be reading outside the bounds of the key.
- printf("Testing alignment handling..........");
+// Flipping a bit of a key should, with overwhelmingly high probability,
+// result in a different hash.
- char testblob[512];
- rand_p(testblob,512);
+// Hashing the same key twice should always produce the same result.
+
+// The memory alignment of the key should not affect the hash result.
- char * bufs[16];
- char * strings[16];
+bool SanityTest ( pfHash hash, const int hashbits )
+{ printf("Testing bit twiddling");
+
+ bool result = true;
- for(int i = 0; i < 16; i++)
+ const int hashbytes = hashbits/8;
+ const int reps = 10;
+ const int keymax = 128;
+ const int pad = 16;
+ const int buflen = keymax + pad*3;
+
+ uint8_t * buffer1 = new uint8_t[buflen];
+ uint8_t * buffer2 = new uint8_t[buflen];
+
+ uint8_t * hash1 = new uint8_t[hashbytes];
+ uint8_t * hash2 = new uint8_t[hashbytes];
+
+ //----------
+
+ for(int irep = 0; irep < reps; irep++)
{
- bufs[i] = new char[1024];
- uint32_t b = uint32_t(bufs[i]);
+ if(irep % (reps/10) == 0) printf(".");
- b = (b+15)&(~15);
+ for(int len = 4; len <= keymax; len++)
+ {
+ for(int offset = pad; offset < pad*2; offset++)
+ {
+ uint8_t * key1 = &buffer1[pad];
+ uint8_t * key2 = &buffer2[pad+offset];
- strings[i] = (char*)(b + i);
-
- memcpy(strings[i],testblob,512);
- }
+ rand_p(buffer1,buflen);
+ rand_p(buffer2,buflen);
- bool result = true;
+ memcpy(key2,key1,len);
- uint32_t hash1[64];
- uint32_t hash2[64];
+ hash(key1,len,0,hash1);
- for(int k = 1; k <= 512; k++)
- for(int j = 0; j < 15; j++)
- for(int i = j+1; i < 16; i++)
- {
- const char * s1 = strings[i];
- const char * s2 = strings[j];
+ for(int bit = 0; bit < (len * 8); bit++)
+ {
+ // Flip a bit, hash the key -> we should get a different result.
- hash(s1,k,0,hash1);
- hash(s2,k,0,hash2);
+ flipbit(key2,len,bit);
+ hash(key2,len,0,hash2);
- if(memcmp(hash1,hash2,hashbytes) != 0)
- {
- result = false;
+ if(memcmp(hash1,hash2,hashbytes) == 0)
+ {
+ result = false;
+ }
+
+ // Flip it back, hash again -> we should get the original result.
+
+ flipbit(key2,len,bit);
+ hash(key2,len,0,hash2);
+
+ if(memcmp(hash1,hash2,hashbytes) != 0)
+ {
+ result = false;
+ }
+ }
+ }
}
}
- if(!result)
+ if(result == false)
{
printf("*********FAIL*********\n");
}
@@ -86,7 +112,10 @@ void AlignmentTest ( pfHash hash, const int hashbits )
printf("PASS\n");
}
- for(int i = 0; i < 16; i++) delete [] bufs[i];
+ delete [] hash1;
+ delete [] hash2;
+
+ return result;
}
//----------------------------------------------------------------------------
@@ -132,85 +161,4 @@ void AppendedZeroesTest ( pfHash hash, const int hashbits )
printf("PASS\n");
}
-//----------------------------------------------------------------------------
-// Basic sanity checks -
-
-// A hash function should not be reading outside the bounds of the key.
-
-// Flipping a bit of a key should, with overwhelmingly high probability,
-// result in a different hash.
-
-// Hashing the same key twice should always produce the same result.
-
-bool SanityTest ( pfHash hash, const int hashbits )
-{
- bool result = true;
-
- const int hashbytes = hashbits/8;
- const int reps = 100;
-
- printf("Testing bit twiddling");
-
- uint8_t buffer[256];
- uint8_t * key = &buffer[64];
-
- uint8_t * h1 = new uint8_t[hashbytes];
- uint8_t * h2 = new uint8_t[hashbytes];
-
- for(int irep = 0; irep < reps; irep++)
- {
- if(irep % (reps/10) == 0) printf(".");
-
- for(int len = 1; len <= 128; len++)
- {
- // Generate a random key in the middle of the buffer, hash it,
- // and then fill the space around the key with garbage. If a
- // broken hash function reads past the ends of the key, it should
- // fail the "did we get the same hash?" test below.
-
- rand_p(key,len);
- hash(key,len,0,h1);
-
- rand_p(buffer,64);
- rand_p(key+len,64);
-
- // Flip a bit, hash the key -> we should get a different result.
- // Flip it back, hash again -> we should get the same result.
-
- for(int bit = 0; bit < (len * 8); bit++)
- {
- flipbit(key,len,bit);
- hash(key,len,0,h2);
-
- if(memcmp(h1,h2,hashbytes) == 0)
- {
- result = false;
- }
-
- flipbit(key,len,bit);
- hash(key,len,0,h2);
-
- if(memcmp(h1,h2,hashbytes) != 0)
- {
- result = false;
- }
- }
- }
- }
-
- if(result == false)
- {
- printf("*********FAIL*********\n");
- }
- else
- {
- printf("PASS\n");
- }
-
- delete [] h1;
- delete [] h2;
-
- return result;
-}
-
//-----------------------------------------------------------------------------
diff --git a/KeysetTest.h b/KeysetTest.h
index 53b61ec..7ef398c 100644
--- a/KeysetTest.h
+++ b/KeysetTest.h
@@ -20,6 +20,62 @@ void QuickBrownFox ( pfHash hash, const int hashbits );
void AlignmentTest ( pfHash hash, const int hashbits );
void AppendedZeroesTest ( pfHash hash, const int hashbits );
+//-----------------------------------------------------------------------------
+// Keyset 'Combination' - all possible combinations of input blocks
+
+template< typename hashtype >
+void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen,
+ uint32_t * blocks, int blockcount,
+ pfHash hash, std::vector<hashtype> & hashes )
+{
+ if(len == maxlen) return;
+
+ for(int i = 0; i < blockcount; i++)
+ {
+ key[len] = blocks[i];
+
+ //if(len == maxlen-1)
+ {
+ hashtype h;
+ hash(key,(len+1) * sizeof(uint32_t),0,&h);
+ hashes.push_back(h);
+ }
+
+ //else
+ {
+ CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);
+ }
+ }
+}
+
+template< typename hashtype >
+bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
+{
+ printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);
+
+ //----------
+
+ std::vector<hashtype> hashes;
+
+ uint32_t * key = new uint32_t[maxlen];
+
+ CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);
+
+ delete [] key;
+
+ printf("%d keys\n",(int)hashes.size());
+
+ //----------
+
+ bool result = true;
+
+ result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
+
+ printf("\n");
+
+ return result;
+}
+
//----------------------------------------------------------------------------
// Keyset 'Permutation' - given a set of 32-bit blocks, generate keys
// consisting of all possible permutations of those blocks
diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index c66bd87..75e0209 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -1,7 +1,12 @@
#include "MurmurHash3.h"
-
#include <stdlib.h> // for _rotl
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+
//-----------------------------------------------------------------------------
// Block read - if your platform needs to do endian-swapping or can only
// handle aligned reads, do the conversion here
@@ -12,43 +17,32 @@ inline uint32_t getblock ( const uint32_t * p, int i )
}
//----------
-// Finalization mix - force all bits of a hash block to avalanche
-
-// avalanches all bits to within 0.25% bias
-
-inline uint32_t fmix32 ( uint32_t h )
-{
- h ^= h >> 16;
- h *= 0x85ebca6b;
- h ^= h >> 13;
- h *= 0xc2b2ae35;
- h ^= h >> 16;
-
- return h;
-}
-
-//-----------------------------------------------------------------------------
inline void bmix32 ( uint32_t & h1, uint32_t & k1, uint32_t & c1, uint32_t & c2 )
{
+ c1 = c1*5+0x7b7d159c;
+ c2 = c2*5+0x6bce6396;
+
k1 *= c1;
- k1 = _rotl(k1,11);
+ k1 = _rotl(k1,11);
k1 *= c2;
- h1 ^= k1;
-
- h1 = h1*3+0x52dce729;
- c1 = c1*5+0x7b7d159c;
- c2 = c2*5+0x6bce6396;
+ h1 = _rotl(h1,13);
+ h1 = h1*5+0x52dce729;
+ h1 ^= k1;
}
//----------
+//void MurmurHash3_x86_32 ( const void * key, int len, const void * seed, void * out )
void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
const int nblocks = len / 4;
+ //uint32_t * s = (uint32_t*)seed;
+
+ //uint32_t h1 = 0x971e137b ^ s[0];
uint32_t h1 = 0x971e137b ^ seed;
uint32_t c1 = 0x95543787;
@@ -86,101 +80,18 @@ void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out )
h1 ^= len;
- h1 = fmix32(h1);
+ h1 *= 0x85ebca6b;
+ h1 ^= h1 >> 13;
+ h1 *= 0xc2b2ae35;
+ h1 ^= h1 >> 16;
+
+ //h1 ^= s[0];
+ h1 ^= seed;
*(uint32_t*)out = h1;
}
//-----------------------------------------------------------------------------
-
-inline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & k1, uint32_t & k2, uint32_t & c1, uint32_t & c2 )
-{
- k1 *= c1;
- k1 = _rotl(k1,11);
- k1 *= c2;
- h1 ^= k1;
- h1 += h2;
-
- h2 = _rotl(h2,17);
-
- k2 *= c2;
- k2 = _rotl(k2,11);
- k2 *= c1;
- h2 ^= k2;
- h2 += h1;
-
- h1 = h1*3+0x52dce729;
- h2 = h2*3+0x38495ab5;
-
- c1 = c1*5+0x7b7d159c;
- c2 = c2*5+0x6bce6396;
-}
-
-//----------
-
-void MurmurHash3_x86_64 ( const void * key, const int len, const uint32_t seed, void * out )
-{
- const uint8_t * data = (const uint8_t*)key;
- const int nblocks = len / 8;
-
- uint32_t h1 = 0x8de1c3ac ^ seed;
- uint32_t h2 = 0xbab98226 ^ seed;
-
- uint32_t c1 = 0x95543787;
- uint32_t c2 = 0x2ad7eb25;
-
- //----------
- // body
-
- const uint32_t * blocks = (const uint32_t *)(data + nblocks*8);
-
- for(int i = -nblocks; i; i++)
- {
- uint32_t k1 = getblock(blocks,i*2+0);
- uint32_t k2 = getblock(blocks,i*2+1);
-
- bmix32(h1,h2,k1,k2,c1,c2);
- }
-
- //----------
- // tail
-
- const uint8_t * tail = (const uint8_t*)(data + nblocks*8);
-
- uint32_t k1 = 0;
- uint32_t k2 = 0;
-
- switch(len & 7)
- {
- case 7: k2 ^= tail[6] << 16;
- case 6: k2 ^= tail[5] << 8;
- case 5: k2 ^= tail[4] << 0;
- case 4: k1 ^= tail[3] << 24;
- case 3: k1 ^= tail[2] << 16;
- case 2: k1 ^= tail[1] << 8;
- case 1: k1 ^= tail[0] << 0;
- bmix32(h1,h2,k1,k2,c1,c2);
- };
-
- //----------
- // finalization
-
- h2 ^= len;
-
- h1 += h2;
- h2 += h1;
-
- h1 = fmix32(h1);
- h2 = fmix32(h2);
-
- h1 += h2;
- h2 += h1;
-
- ((uint32_t*)out)[0] = h1;
- ((uint32_t*)out)[1] = h2;
-}
-
-//-----------------------------------------------------------------------------
// This mix is large enough that VC++ refuses to inline it unless we use
// __forceinline. It's also not all that fast due to register spillage.
@@ -230,12 +141,34 @@ __forceinline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & h3, uint32_
}
//----------
+// Finalization mix - force all bits of a hash block to avalanche
-void MurmurHash3_x86_128 ( const void * key, const int len, const uint32_t seed, void * out )
+// avalanches all bits to within 0.25% bias
+
+inline uint32_t fmix32 ( uint32_t h )
+{
+ h ^= h >> 16;
+ h *= 0x85ebca6b;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35;
+ h ^= h >> 16;
+
+ return h;
+}
+
+//void MurmurHash3_x86_128 ( const void * key, const int len, const void * seed, void * out )
+void MurmurHash3_x86_128 ( const void * key, const int len, uint32_t seed, void * out )
{
const uint8_t * data = (const uint8_t*)key;
const int nblocks = len / 16;
+ //uint32_t * s = (uint32_t*)(seed);
+
+ //uint32_t h1 = 0x8de1c3ac ^ s[0];
+ //uint32_t h2 = 0xbab98226 ^ s[1];
+ //uint32_t h3 = 0xfcba5b2d ^ s[2];
+ //uint32_t h4 = 0x32452e3e ^ s[3];
+
uint32_t h1 = 0x8de1c3ac ^ seed;
uint32_t h2 = 0xbab98226 ^ seed;
uint32_t h3 = 0xfcba5b2d ^ seed;
@@ -274,14 +207,17 @@ void MurmurHash3_x86_128 ( const void * key, const int len, const uint32_t seed,
case 15: k4 ^= tail[14] << 16;
case 14: k4 ^= tail[13] << 8;
case 13: k4 ^= tail[12] << 0;
+
case 12: k3 ^= tail[11] << 24;
case 11: k3 ^= tail[10] << 16;
case 10: k3 ^= tail[ 9] << 8;
case 9: k3 ^= tail[ 8] << 0;
+
case 8: k2 ^= tail[ 7] << 24;
case 7: k2 ^= tail[ 6] << 16;
case 6: k2 ^= tail[ 5] << 8;
case 5: k2 ^= tail[ 4] << 0;
+
case 4: k1 ^= tail[ 3] << 24;
case 3: k1 ^= tail[ 2] << 16;
case 2: k1 ^= tail[ 1] << 8;
@@ -328,16 +264,18 @@ inline void bmix64 ( uint64_t & h1, uint64_t & h2, uint64_t & k1, uint64_t & k2,
k1 *= c1;
k1 = _rotl64(k1,23);
k1 *= c2;
- h1 ^= k1;
- h1 += h2;
-
- h2 = _rotl64(h2,41);
k2 *= c2;
k2 = _rotl64(k2,23);
k2 *= c1;
- h2 ^= k2;
+
+ h1 = _rotl64(h1,17);
+ h1 += h2;
+ h1 ^= k1;
+
+ h2 = _rotl64(h2,41);
h2 += h1;
+ h2 ^= k2;
h1 = h1*3+0x52dce729;
h2 = h2*3+0x38495ab5;
@@ -434,27 +372,3 @@ void MurmurHash3_x64_128 ( const void * key, const int len, const uint32_t seed,
}
//-----------------------------------------------------------------------------
-// If we need a smaller hash value, it's faster to just use a portion of the
-// 128-bit hash
-
-void MurmurHash3_x64_32 ( const void * key, int len, uint32_t seed, void * out )
-{
- uint32_t temp[4];
-
- MurmurHash3_x64_128(key,len,seed,temp);
-
- *(uint32_t*)out = temp[0];
-}
-
-//----------
-
-void MurmurHash3_x64_64 ( const void * key, int len, uint32_t seed, void * out )
-{
- uint64_t temp[2];
-
- MurmurHash3_x64_128(key,len,seed,temp);
-
- *(uint64_t*)out = temp[0];
-}
-
-//-----------------------------------------------------------------------------
diff --git a/MurmurHashAligned.cpp b/MurmurHashAligned.cpp
index 716dda6..63dfe61 100644
--- a/MurmurHashAligned.cpp
+++ b/MurmurHashAligned.cpp
@@ -1,2 +1,2 @@
-#include "stdafx.h"
+//#include "stdafx.h"
diff --git a/MurmurHashAligned2.cpp b/MurmurHashAligned2.cpp
index 23dced4..83e9723 100644
--- a/MurmurHashAligned2.cpp
+++ b/MurmurHashAligned2.cpp
@@ -1,4 +1,4 @@
-#include "stdafx.h"
+//#include "stdafx.h"
#pragma warning(disable:4311)
diff --git a/MurmurHashNeutral2.cpp b/MurmurHashNeutral2.cpp
index 716dda6..63dfe61 100644
--- a/MurmurHashNeutral2.cpp
+++ b/MurmurHashNeutral2.cpp
@@ -1,2 +1,2 @@
-#include "stdafx.h"
+//#include "stdafx.h"
diff --git a/MurmurHashTest.cpp b/MurmurHashTest.cpp
index 6b18f53..3bc6a96 100644
--- a/MurmurHashTest.cpp
+++ b/MurmurHashTest.cpp
@@ -7,20 +7,3 @@ uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed );
uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed );
uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed );
uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed );
-
-
-void MurmurHash1_test ( const void * key, int len, uint32_t seed, void * out )
-{
- *(uint32_t*)out = MurmurHash1(key,len,seed);
-}
-
-void MurmurHash2_test ( const void * key, int len, uint32_t seed, void * out )
-{
- *(uint32_t*)out = MurmurHash2(key,len,seed);
-}
-
-void MurmurHash2A_test ( const void * key, int len, uint32_t seed, void * out )
-{
- *(uint32_t*)out = MurmurHash2A(key,len,seed);
-}
-
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index dbfadcb..d95acd4 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -48,3 +48,60 @@ void BulkSpeedTest ( pfHash hash )
delete [] block;
}
+
+//-----------------------------------------------------------------------------
+
+void TinySpeedTest ( pfHash hash, int hashsize, int keysize, bool verbose, double & outCycles )
+{
+ const int trials = 100000;
+
+ if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
+
+ uint8_t * h = new uint8_t[hashsize];
+ uint8_t * k = new uint8_t[keysize];
+
+ double bestcycles = 1e9;
+
+ for(int itrial = 0; itrial < trials; itrial++)
+ {
+ __int64 begin,end;
+
+ rand_p(k,keysize);
+
+ begin = __rdtsc();
+
+ hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h);
+ hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h);
+ hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h);
+ hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h);
+
+ hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h);
+ hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h);
+ hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h);
+ hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h);
+
+ hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h);
+ hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h);
+ hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h);
+ hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h);
+
+ hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h);
+ hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h);
+ hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h);
+ hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h);
+
+ end = __rdtsc();
+
+ //blackhole(*(uint32_t*)(&h));
+
+ double cycles = double(end-begin) / 64;
+ if(cycles < bestcycles) bestcycles = cycles;
+ }
+
+ double bestbpc = double(keysize) / bestcycles;
+ if(verbose) printf("%8.2f cycles/hash, %8.4f bytes/cycle\n",bestcycles,bestbpc);
+
+ outCycles = bestcycles;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/SpeedTest.h b/SpeedTest.h
index 5a5ed54..a8f0086 100644
--- a/SpeedTest.h
+++ b/SpeedTest.h
@@ -3,59 +3,6 @@
#include "Types.h"
void BulkSpeedTest ( pfHash hash );
-
-//----------------------------------------------------------------------------
-
-template < typename hashtype, int keysize >
-void TinySpeedTest ( pfHash hash )
-{
- const int trials = 100000;
-
- printf("Small key speed test - %4d-byte keys - ",keysize);
-
- uint8_t k[keysize];
- hashtype h;
-
- double bestcycles = 1e9;
-
- for(int itrial = 0; itrial < trials; itrial++)
- {
- __int64 begin,end;
-
- rand_p(k,keysize);
-
- begin = __rdtsc();
-
- hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h);
- hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h);
- hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h);
- hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h);
-
- hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h);
- hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h);
- hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h);
- hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h);
-
- hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h);
- hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h);
- hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h);
- hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h);
-
- hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h);
- hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h);
- hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h);
- hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h);
-
- end = __rdtsc();
-
- blackhole(*(uint32_t*)(&h));
-
- double cycles = double(end-begin) / 64;
- if(cycles < bestcycles) bestcycles = cycles;
- }
-
- double bestbpc = double(keysize) / bestcycles;
- printf("%8.2f cycles/hash, %8.4f bytes/cycle\n",bestcycles,bestbpc);
-}
+void TinySpeedTest ( pfHash hash, int hashsize, int keysize, bool verbose, double & outCycles );
//-----------------------------------------------------------------------------
diff --git a/Types.h b/Types.h
index eb2a309..d1eae9f 100644
--- a/Types.h
+++ b/Types.h
@@ -233,5 +233,6 @@ private:
};
typedef Blob<128> uint128_t;
+typedef Blob<256> uint256_t;
//-----------------------------------------------------------------------------
diff --git a/main.cpp b/main.cpp
index f5fcf04..36636ea 100644
--- a/main.cpp
+++ b/main.cpp
@@ -81,6 +81,7 @@ HashInfo g_hashes[] =
{ FNV, 32, "FNV", "Fowler-Noll-Vo hash, 32-bit" },
{ lookup3_test, 32, "lookup3", "Bob Jenkins' lookup3" },
{ SuperFastHash, 32, "superfast", "Paul Hsieh's SuperFastHash" },
+ { MurmurOAAT, 32, "MurmurOAAT", "Murmur one-at-a-time" },
// MurmurHash2
@@ -92,11 +93,7 @@ HashInfo g_hashes[] =
// MurmurHash3
{ MurmurHash3_x86_32, 32, "Murmur3A", "MurmurHash3 for x86, 32-bit" },
- { MurmurHash3_x86_64, 64, "Murmur3B", "MurmurHash3 for x86, 64-bit" },
{ MurmurHash3_x86_128, 128, "Murmur3C", "MurmurHash3 for x86, 128-bit" },
-
- { MurmurHash3_x64_32, 32, "Murmur3D", "MurmurHash3 for x64, 32-bit" },
- { MurmurHash3_x64_64, 64, "Murmur3E", "MurmurHash3 for x64, 64-bit" },
{ MurmurHash3_x64_128, 128, "Murmur3F", "MurmurHash3 for x64, 128-bit" },
};
@@ -130,7 +127,6 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
QuickBrownFox(hash,hashbits);
SanityTest(hash,hashbits);
- AlignmentTest(hash,hashbits);
AppendedZeroesTest(hash,hashbits);
printf("\n");
}
@@ -145,12 +141,21 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
BulkSpeedTest(hash);
printf("\n");
- TinySpeedTest<hashtype,4>(hash);
- TinySpeedTest<hashtype,8>(hash);
- TinySpeedTest<hashtype,16>(hash);
- TinySpeedTest<hashtype,32>(hash);
- TinySpeedTest<hashtype,64>(hash);
- TinySpeedTest<hashtype,128>(hash);
+ for(int i = 1; i < 32; i++)
+ {
+ double cycles;
+
+ TinySpeedTest(hash,sizeof(hashtype),i,true,cycles);
+ }
+
+ for(int i = 32; i <= 2048; i += 32)
+ {
+ double cycles;
+
+
+ TinySpeedTest(hash,sizeof(hashtype),i,true,cycles);
+ }
+
printf("\n");
}
@@ -181,10 +186,44 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
{
printf("[[[ Avalanche Tests ]]]\n\n");
- const int hashbits = sizeof(hashtype) * 8;
+ //const int hashbits = sizeof(hashtype) * 8;
bool result = true;
- result &= AvalancheTest< Blob<hashbits * 2>, hashtype > (hash,2000000);
+ result &= AvalancheTest< Blob< 32>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob< 40>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob< 48>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob< 56>, hashtype > (hash,300000);
+
+ result &= AvalancheTest< Blob< 64>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob< 72>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob< 80>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob< 88>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob< 96>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<104>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<112>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<120>, hashtype > (hash,300000);
+
+ result &= AvalancheTest< Blob<128>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<136>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<144>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<152>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<160>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<168>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<176>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<184>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<192>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<200>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<208>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<216>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<224>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<232>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<240>, hashtype > (hash,300000);
+ result &= AvalancheTest< Blob<248>, hashtype > (hash,300000);
+
+ result &= AvalancheTest< Blob<256>, hashtype > (hash,300000);
+
+ //result &= AvalancheTest< Blob<hashbits * 2>, hashtype > (hash,200000);
+ //result &= AvalancheTest< Blob<768>, hashtype > (hash,2000000);
// The bit independence test is slow and not particularly useful...
//result &= BicTest < Blob<hashbits * 2>, hashtype > ( hash, 1000000 );
@@ -241,34 +280,139 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
if(g_testPermutation || g_testAll)
{
- printf("[[[ Keyset 'Permutation' Tests ]]]\n\n");
+ {
+ // This one breaks lookup3, surprisingly
- bool result = true;
- bool drawDiagram = false;
+ printf("[[[ Keyset 'Combination Lowbits' Tests ]]]\n\n");
+
+ bool result = true;
+ bool drawDiagram = false;
- // This very sparse set of blocks is particularly hard for SuperFastHash
+ uint32_t blocks[] =
+ {
+ 0x00000000,
+
+ 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
+ };
+
+ result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+ if(!result) printf("*********FAIL*********\n");
+ printf("\n");
+ }
- uint32_t blocks[] =
{
- 0x00000000,
- 0x00000001,
- 0x00000002,
-
- 0x00000400,
- 0x00008000,
-
- 0x00080000,
- 0x00200000,
-
- 0x20000000,
- 0x40000000,
- 0x80000000,
- };
-
- result &= PermutationKeyTest<hashtype>(hash,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+ printf("[[[ Keyset 'Combination Highbits' Tests ]]]\n\n");
- if(!result) printf("*********FAIL*********\n");
- printf("\n");
+ bool result = true;
+ bool drawDiagram = false;
+
+ uint32_t blocks[] =
+ {
+ 0x00000000,
+
+ 0x20000000, 0x40000000, 0x60000000, 0x80000000, 0xA0000000, 0xC0000000, 0xE0000000
+ };
+
+ result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+ if(!result) printf("*********FAIL*********\n");
+ printf("\n");
+ }
+
+ {
+ printf("[[[ Keyset 'Combination 0x8000000' Tests ]]]\n\n");
+
+ bool result = true;
+ bool drawDiagram = false;
+
+ uint32_t blocks[] =
+ {
+ 0x00000000,
+
+ 0x80000000,
+ };
+
+ result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+ if(!result) printf("*********FAIL*********\n");
+ printf("\n");
+ }
+
+ {
+ printf("[[[ Keyset 'Combination 0x0000001' Tests ]]]\n\n");
+
+ bool result = true;
+ bool drawDiagram = false;
+
+ uint32_t blocks[] =
+ {
+ 0x00000000,
+
+ 0x00000001,
+ };
+
+ result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+ if(!result) printf("*********FAIL*********\n");
+ printf("\n");
+ }
+
+ {
+ printf("[[[ Keyset 'Combination Hi-Lo' Tests ]]]\n\n");
+
+ bool result = true;
+ bool drawDiagram = false;
+
+ uint32_t blocks[] =
+ {
+ 0x00000000,
+
+ 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
+
+ 0x80000000, 0x40000000, 0xC0000000, 0x20000000, 0xA0000000, 0x60000000, 0xE0000000
+ };
+
+ result &= CombinationKeyTest<hashtype>(hash,6,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+ if(!result) printf("*********FAIL*********\n");
+ printf("\n");
+ }
+
+ //----------
+
+ /*
+ {
+ printf("[[[ Keyset 'Permutation' Tests ]]]\n\n");
+
+ bool result = true;
+ bool drawDiagram = false;
+
+ // This very sparse set of blocks is particularly hard for SuperFastHash
+
+ uint32_t blocks[] =
+ {
+ 0x00000000,
+ 0x00000001,
+ 0x00000002,
+
+ 0x00000400,
+ 0x00008000,
+
+ 0x00080000,
+ 0x00200000,
+
+ 0x20000000,
+ 0x40000000,
+ 0x80000000,
+ };
+
+ result &= PermutationKeyTest<hashtype>(hash,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+ if(!result) printf("*********FAIL*********\n");
+ printf("\n");
+ }
+ */
}
//-----------------------------------------------------------------------------
@@ -370,6 +514,10 @@ void testHash ( const char * name )
{
test<uint128_t>( pInfo->hash, pInfo->desc );
}
+ else if(pInfo->hashbits == 256)
+ {
+ test<uint256_t>( pInfo->hash, pInfo->desc );
+ }
else
{
printf("Invalid hash bit width %d for hash '%s'",pInfo->hashbits,pInfo->name);
@@ -393,9 +541,11 @@ int main ( int argc, char ** argv )
//g_testSanity = true;
//g_testSpeed = true;
//g_testAvalanche = true;
+ //g_testCyclic = true;
//g_testDiff = true;
//g_testSparse = true;
//g_testPermutation = true;
+ //g_testZeroes = true;
//testHash("rand32");
//testHash("rand64");
@@ -408,22 +558,24 @@ int main ( int argc, char ** argv )
//printf("Called the hash function %I64d times, %I64d bytes hashed\n",g_hashcount,g_bytecount);
//testHash("crc32");
+ //testHash("rand128");
//testHash("fnv");
//testHash("superfast");
//testHash("lookup3");
+ //testHash("MurmurOAAT");
//testHash("murmur2");
//testHash("murmur2B");
//testHash("murmur2C");
- testHash("murmur3a");
- testHash("murmur3b");
- testHash("murmur3c");
+ //testHash("murmur3a");
+ //testHash("murmur3b");
+ //testHash("murmur3c");
- testHash("murmur3d");
- testHash("murmur3e");
- testHash("murmur3f");
+ //testHash("murmur3d");
+ //testHash("murmur3e");
+ //testHash("murmur3f");
//----------
@@ -432,4 +584,4 @@ int main ( int argc, char ** argv )
printf("time %d\n",b-a);
return 0;
-}
+} \ No newline at end of file