Tweak Murmur3, new more rigorous sanity test, combinatorial block test

git-svn-id: http://smhasher.googlecode.com/svn/trunk@76 77a7d1d3-4c08-bdc2-d393-d5859734b01a
author: tanjent@gmail.com <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a> 2011-02-28 06:03:12 +0000
committer: tanjent@gmail.com <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a> 2011-02-28 06:03:12 +0000
commit: babb553380f2d223ebc4730830233c5d224963e8 (patch)
tree: e2cca8323914c05fc6c888841d581a0c7db0c384
parent: 9d17d0b842230279e2a89b2e65a7dde05494d529 (diff)
download: src-babb553380f2d223ebc4730830233c5d224963e8.tar.gz
15 files changed, 480 insertions, 379 deletions
diff --git a/Bitvec.cpp b/Bitvec.cpp
index 463c1e4..6939980 100644
--- a/Bitvec.cpp
+++ b/Bitvec.cpp
@@ -71,6 +71,20 @@ void printhex32 ( void * blob, int len )
 	printf("}");
 }
 
+void printbytes ( void * blob, int len )
+{
+	uint8_t * d = (uint8_t*)blob;
+
+	printf("{ ");
+
+	for(int i = 0; i < len; i++)
+	{
+		printf("0x%02x, ",d[i]);
+	}
+
+	printf(" };");
+}
+
 //-----------------------------------------------------------------------------
 
 uint32_t getbit ( void * block, int len, uint32_t bit )
diff --git a/Bitvec.h b/Bitvec.h
index fce97d5..1a475d1 100644
--- a/Bitvec.h
+++ b/Bitvec.h
@@ -8,6 +8,7 @@
 
 void     printbits   ( void * blob, int len );
 void     printhex32  ( void * blob, int len );
+void     printbytes  ( void * blob, int len );
 
 uint32_t getbit      ( void * blob, int len, uint32_t bit );
 uint32_t getbit_wrap ( void * blob, int len, uint32_t bit );
diff --git a/Hashes.cpp b/Hashes.cpp
index f6787c6..cebd37f 100644
--- a/Hashes.cpp
+++ b/Hashes.cpp
@@ -2,6 +2,13 @@
 
 #include "Random.h"
 
+
+#include <stdlib.h>
+//#include <stdint.h>
+#include <assert.h>
+#include <emmintrin.h>
+#include <xmmintrin.h>
+
 //----------------------------------------------------------------------------
 // fake / bad hashes
 
@@ -54,6 +61,20 @@ void sumhash ( const void * key, int len, uint32_t seed, void * out )
 	*(uint32_t*)out = h;
 }
 
+void sumhash32 ( const void * key, int len, uint32_t seed, void * out )
+{
+	uint32_t h = seed;
+
+	const uint32_t * data = (const uint32_t*)key;
+
+	for(int i = 0; i < len/4; i++)
+	{
+		h += data[i];
+	}
+
+	*(uint32_t*)out = h;
+}
+
 void DoNothingHash ( const void *, int, uint32_t, void * )
 {
 	return;
@@ -62,20 +83,23 @@ void DoNothingHash ( const void *, int, uint32_t, void * )
 //-----------------------------------------------------------------------------
 // One-byte-at-a-time hash based on Murmur's mix
 
-uint32_t MurmurOAAT ( const void * key, int len, uint32_t h )
+void MurmurOAAT ( const void * key, int len, uint32_t seed, void * out )
 {
 	const uint8_t * data = (const uint8_t*)key;
 
-	h ^= len;
+	uint32_t h = seed ^ len;
 
 	for(int i = 0; i < len; i++)
 	{
 		h ^= data[i];
 		h *= 0x5bd1e995;
-		h ^= h >> 16;
+		h ^= h >> 15;
 	}
 
-	return h;
+	h *= 0x5bd1e995;
+	h ^= h >> 15;
+
+	*(uint32_t*)out = h;
 }
 
 //----------------------------------------------------------------------------
diff --git a/Hashes.h b/Hashes.h
index 1aad04c..a580113 100644
--- a/Hashes.h
+++ b/Hashes.h
@@ -9,6 +9,9 @@
 //----------
 // These are _not_ hash functions (even though people tend to use crc32 as one...)
 
+void sumhash               ( const void * key, int len, uint32_t seed, void * out );
+void sumhash32             ( const void * key, int len, uint32_t seed, void * out );
+
 void DoNothingHash         ( const void * key, int len, uint32_t seed, void * out );
 void crc32                 ( const void * key, int len, uint32_t seed, void * out );
 
@@ -28,6 +31,7 @@ void sha1_32a              ( const void * key, int len, uint32_t seed, void * ou
 void FNV                   ( const void * key, int len, uint32_t seed, void * out );
 void SuperFastHash         ( const void * key, int len, uint32_t seed, void * out );
 void lookup3_test          ( const void * key, int len, uint32_t seed, void * out );
+void MurmurOAAT            ( const void * key, int len, uint32_t seed, void * out );
 
 //----------
 // MurmurHash2
diff --git a/KeysetTest.cpp b/KeysetTest.cpp
index 70b63de..6436826 100644
--- a/KeysetTest.cpp
+++ b/KeysetTest.cpp
@@ -29,55 +29,81 @@ void QuickBrownFox ( pfHash hash, const int hashbits )
 }
 
 //----------------------------------------------------------------------------
-// Alignment of the keys should not affect the hash value - if it does,
-// something is broken.
+// Basic sanity checks -
 
-void AlignmentTest ( pfHash hash, const int hashbits )
-{
-	const int hashbytes = hashbits / 8;
+// A hash function should not be reading outside the bounds of the key.
 
-	printf("Testing alignment handling..........");
+// Flipping a bit of a key should, with overwhelmingly high probability,
+// result in a different hash.
 
-	char testblob[512];
-	rand_p(testblob,512);
+// Hashing the same key twice should always produce the same result.
+
+// The memory alignment of the key should not affect the hash result.
 
-	char * bufs[16];
-	char * strings[16];
+bool SanityTest ( pfHash hash, const int hashbits )
+{	printf("Testing bit twiddling");
+
+	bool result = true;
 
-	for(int i = 0; i < 16; i++)
+	const int hashbytes = hashbits/8;
+	const int reps = 10;
+	const int keymax = 128;
+	const int pad = 16;
+	const int buflen = keymax + pad*3;
+	
+	uint8_t * buffer1 = new uint8_t[buflen];
+	uint8_t * buffer2 = new uint8_t[buflen];
+
+	uint8_t * hash1 = new uint8_t[hashbytes];
+	uint8_t * hash2 = new uint8_t[hashbytes];
+
+	//----------
+	
+	for(int irep = 0; irep < reps; irep++)
 	{
-		bufs[i] = new char[1024];
-		uint32_t b = uint32_t(bufs[i]);
+		if(irep % (reps/10) == 0) printf(".");
 
-		b = (b+15)&(~15);
+		for(int len = 4; len <= keymax; len++)
+		{
+			for(int offset = pad; offset < pad*2; offset++)
+			{
+				uint8_t * key1 = &buffer1[pad];
+				uint8_t * key2 = &buffer2[pad+offset];
 
-		strings[i] = (char*)(b + i);
-		
-		memcpy(strings[i],testblob,512);
-	}
+				rand_p(buffer1,buflen);
+				rand_p(buffer2,buflen);
 
-	bool result = true;
+				memcpy(key2,key1,len);
 
-	uint32_t hash1[64];
-	uint32_t hash2[64];
+				hash(key1,len,0,hash1);
 
-	for(int k = 1; k <= 512; k++)
-	for(int j = 0; j < 15; j++)
-	for(int i = j+1; i < 16; i++)
-	{
-		const char * s1 = strings[i];
-		const char * s2 = strings[j];
+				for(int bit = 0; bit < (len * 8); bit++)
+				{
+					// Flip a bit, hash the key -> we should get a different result.
 
-		hash(s1,k,0,hash1);
-		hash(s2,k,0,hash2);
+					flipbit(key2,len,bit);
+					hash(key2,len,0,hash2);
 
-		if(memcmp(hash1,hash2,hashbytes) != 0)
-		{
-			result = false;
+					if(memcmp(hash1,hash2,hashbytes) == 0)
+					{
+						result = false;
+					}
+
+					// Flip it back, hash again -> we should get the original result.
+
+					flipbit(key2,len,bit);
+					hash(key2,len,0,hash2);
+
+					if(memcmp(hash1,hash2,hashbytes) != 0)
+					{
+						result = false;
+					}
+				}
+			}
 		}
 	}
 
-	if(!result)
+	if(result == false)
 	{
 		printf("*********FAIL*********\n");
 	}
@@ -86,7 +112,10 @@ void AlignmentTest ( pfHash hash, const int hashbits )
 		printf("PASS\n");
 	}
 
-	for(int i = 0; i < 16; i++) delete [] bufs[i];
+	delete [] hash1;
+	delete [] hash2;
+
+	return result;
 }
 
 //----------------------------------------------------------------------------
@@ -132,85 +161,4 @@ void AppendedZeroesTest ( pfHash hash, const int hashbits )
 	printf("PASS\n");
 }
 
-//----------------------------------------------------------------------------
-// Basic sanity checks -
-
-// A hash function should not be reading outside the bounds of the key.
-
-// Flipping a bit of a key should, with overwhelmingly high probability,
-// result in a different hash.
-
-// Hashing the same key twice should always produce the same result.
-
-bool SanityTest ( pfHash hash, const int hashbits )
-{
-	bool result = true;
-
-	const int hashbytes = hashbits/8;
-	const int reps = 100;
-
-	printf("Testing bit twiddling");
-
-	uint8_t buffer[256];
-	uint8_t * key = &buffer[64];
-
-	uint8_t * h1 = new uint8_t[hashbytes];
-	uint8_t * h2 = new uint8_t[hashbytes];
-
-	for(int irep = 0; irep < reps; irep++)
-	{
-		if(irep % (reps/10) == 0) printf(".");
-
-		for(int len = 1; len <= 128; len++)
-		{
-			// Generate a random key in the middle of the buffer, hash it,
-			// and then fill the space around the key with garbage. If a
-			// broken hash function reads past the ends of the key, it should
-			// fail the "did we get the same hash?" test below.
-
-			rand_p(key,len);
-			hash(key,len,0,h1);
-
-			rand_p(buffer,64);
-			rand_p(key+len,64);
-
-			// Flip a bit, hash the key -> we should get a different result.
-			// Flip it back, hash again -> we should get the same result.
-
-			for(int bit = 0; bit < (len * 8); bit++)
-			{
-				flipbit(key,len,bit);
-				hash(key,len,0,h2);
-
-				if(memcmp(h1,h2,hashbytes) == 0)
-				{
-					result = false;
-				}
-
-				flipbit(key,len,bit);
-				hash(key,len,0,h2);
-
-				if(memcmp(h1,h2,hashbytes) != 0)
-				{
-					result = false;
-				}
-			}
-		}
-	}
-
-	if(result == false)
-	{
-		printf("*********FAIL*********\n");
-	}
-	else
-	{
-		printf("PASS\n");
-	}
-
-	delete [] h1;
-	delete [] h2;
-
-	return result;
-}
-
 //-----------------------------------------------------------------------------
diff --git a/KeysetTest.h b/KeysetTest.h
index 53b61ec..7ef398c 100644
--- a/KeysetTest.h
+++ b/KeysetTest.h
@@ -20,6 +20,62 @@ void QuickBrownFox      ( pfHash hash, const int hashbits );
 void AlignmentTest      ( pfHash hash, const int hashbits );
 void AppendedZeroesTest ( pfHash hash, const int hashbits );
 
+//-----------------------------------------------------------------------------
+// Keyset 'Combination' - all possible combinations of input blocks
+
+template< typename hashtype >
+void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen, 
+							    uint32_t * blocks, int blockcount, 
+								pfHash hash, std::vector<hashtype> & hashes )
+{
+	if(len == maxlen) return;
+
+	for(int i = 0; i < blockcount; i++)
+	{
+		key[len] = blocks[i];
+	
+		//if(len == maxlen-1)
+		{
+			hashtype h;
+			hash(key,(len+1) * sizeof(uint32_t),0,&h);
+			hashes.push_back(h);
+		}
+
+		//else
+		{
+			CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);
+		}
+	}
+}
+
+template< typename hashtype >
+bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
+{
+	printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);
+
+	//----------
+
+	std::vector<hashtype> hashes;
+
+	uint32_t * key = new uint32_t[maxlen];
+
+	CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);
+
+	delete [] key;
+
+	printf("%d keys\n",(int)hashes.size());
+
+	//----------
+
+	bool result = true;
+
+	result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
+	
+	printf("\n");
+
+	return result;
+}
+
 //----------------------------------------------------------------------------
 // Keyset 'Permutation' - given a set of 32-bit blocks, generate keys
 // consisting of all possible permutations of those blocks
diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index c66bd87..75e0209 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -1,7 +1,12 @@
 #include "MurmurHash3.h"
-
 #include <stdlib.h>    // for _rotl
 
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+
 //-----------------------------------------------------------------------------
 // Block read - if your platform needs to do endian-swapping or can only
 // handle aligned reads, do the conversion here
@@ -12,43 +17,32 @@ inline uint32_t getblock ( const uint32_t * p, int i )
 }
 
 //----------
-// Finalization mix - force all bits of a hash block to avalanche
-
-// avalanches all bits to within 0.25% bias
-
-inline uint32_t fmix32 ( uint32_t h )
-{
-	h ^= h >> 16;
-	h *= 0x85ebca6b;
-	h ^= h >> 13;
-	h *= 0xc2b2ae35;
-	h ^= h >> 16;
-
-	return h;
-}
-
-//-----------------------------------------------------------------------------
 
 inline void bmix32 ( uint32_t & h1, uint32_t & k1, uint32_t & c1, uint32_t & c2 )
 {
+	c1 = c1*5+0x7b7d159c;
+	c2 = c2*5+0x6bce6396;
+
 	k1 *= c1; 
-	k1  = _rotl(k1,11); 
+	k1 = _rotl(k1,11); 
 	k1 *= c2;
-	h1 ^= k1;
-	
-	h1 = h1*3+0x52dce729;
 
-	c1 = c1*5+0x7b7d159c;
-	c2 = c2*5+0x6bce6396;
+	h1 = _rotl(h1,13);
+	h1 = h1*5+0x52dce729;
+	h1 ^= k1;
 }
 
 //----------
 
+//void MurmurHash3_x86_32 ( const void * key, int len, const void * seed, void * out )
 void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out )
 {
 	const uint8_t * data = (const uint8_t*)key;
 	const int nblocks = len / 4;
 
+	//uint32_t * s = (uint32_t*)seed;
+
+	//uint32_t h1 = 0x971e137b ^ s[0];
 	uint32_t h1 = 0x971e137b ^ seed;
 
 	uint32_t c1 = 0x95543787;
@@ -86,101 +80,18 @@ void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out )
 
 	h1 ^= len;
 
-	h1 = fmix32(h1);
+	h1 *= 0x85ebca6b;
+	h1 ^= h1 >> 13;
+	h1 *= 0xc2b2ae35;
+	h1 ^= h1 >> 16;
+
+	//h1 ^= s[0];
+	h1 ^= seed;
 
 	*(uint32_t*)out = h1;
 } 
 
 //-----------------------------------------------------------------------------
-
-inline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & k1, uint32_t & k2, uint32_t & c1, uint32_t & c2 )
-{
-	k1 *= c1; 
-	k1  = _rotl(k1,11); 
-	k1 *= c2;
-	h1 ^= k1;
-	h1 += h2;
-
-	h2 = _rotl(h2,17);
-
-	k2 *= c2; 
-	k2  = _rotl(k2,11);
-	k2 *= c1;
-	h2 ^= k2;
-	h2 += h1;
-
-	h1 = h1*3+0x52dce729;
-	h2 = h2*3+0x38495ab5;
-
-	c1 = c1*5+0x7b7d159c;
-	c2 = c2*5+0x6bce6396;
-}
-
-//----------
-
-void MurmurHash3_x86_64 ( const void * key, const int len, const uint32_t seed, void * out )
-{
-	const uint8_t * data = (const uint8_t*)key;
-	const int nblocks = len / 8;
-
-	uint32_t h1 = 0x8de1c3ac ^ seed;
-	uint32_t h2 = 0xbab98226 ^ seed;
-
-	uint32_t c1 = 0x95543787;
-	uint32_t c2 = 0x2ad7eb25;
-
-	//----------
-	// body
-
-	const uint32_t * blocks = (const uint32_t *)(data + nblocks*8);
-
-	for(int i = -nblocks; i; i++)
-	{
-		uint32_t k1 = getblock(blocks,i*2+0);
-		uint32_t k2 = getblock(blocks,i*2+1);
-
-		bmix32(h1,h2,k1,k2,c1,c2);
-	}
-
-	//----------
-	// tail
-	
-	const uint8_t * tail = (const uint8_t*)(data + nblocks*8);
-
-	uint32_t k1 = 0;
-	uint32_t k2 = 0;
-
-	switch(len & 7)
-	{
-	case 7: k2 ^= tail[6] << 16;
-	case 6: k2 ^= tail[5] << 8;
-	case 5: k2 ^= tail[4] << 0;
-	case 4: k1 ^= tail[3] << 24;
-	case 3: k1 ^= tail[2] << 16;
-	case 2: k1 ^= tail[1] << 8;
-	case 1: k1 ^= tail[0] << 0;
-	        bmix32(h1,h2,k1,k2,c1,c2);
-	};
-
-	//----------
-	// finalization
-
-	h2 ^= len;
-
-	h1 += h2;
-	h2 += h1;
-
-	h1 = fmix32(h1);
-	h2 = fmix32(h2);
-
-	h1 += h2;
-	h2 += h1;
-
-	((uint32_t*)out)[0] = h1;
-	((uint32_t*)out)[1] = h2;
-}
-
-//-----------------------------------------------------------------------------
 // This mix is large enough that VC++ refuses to inline it unless we use
 // __forceinline. It's also not all that fast due to register spillage.
 
@@ -230,12 +141,34 @@ __forceinline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & h3, uint32_
 }
 
 //----------
+// Finalization mix - force all bits of a hash block to avalanche
 
-void MurmurHash3_x86_128 ( const void * key, const int len, const uint32_t seed, void * out )
+// avalanches all bits to within 0.25% bias
+
+inline uint32_t fmix32 ( uint32_t h )
+{
+	h ^= h >> 16;
+	h *= 0x85ebca6b;
+	h ^= h >> 13;
+	h *= 0xc2b2ae35;
+	h ^= h >> 16;
+
+	return h;
+}
+
+//void MurmurHash3_x86_128 ( const void * key, const int len, const void * seed, void * out )
+void MurmurHash3_x86_128 ( const void * key, const int len, uint32_t seed, void * out )
 {
 	const uint8_t * data = (const uint8_t*)key;
 	const int nblocks = len / 16;
 
+	//uint32_t * s = (uint32_t*)(seed);
+
+	//uint32_t h1 = 0x8de1c3ac ^ s[0];
+	//uint32_t h2 = 0xbab98226 ^ s[1];
+	//uint32_t h3 = 0xfcba5b2d ^ s[2];
+	//uint32_t h4 = 0x32452e3e ^ s[3];
+
 	uint32_t h1 = 0x8de1c3ac ^ seed;
 	uint32_t h2 = 0xbab98226 ^ seed;
 	uint32_t h3 = 0xfcba5b2d ^ seed;
@@ -274,14 +207,17 @@ void MurmurHash3_x86_128 ( const void * key, const int len, const uint32_t seed,
 	case 15: k4 ^= tail[14] << 16;
 	case 14: k4 ^= tail[13] << 8;
 	case 13: k4 ^= tail[12] << 0;
+
 	case 12: k3 ^= tail[11] << 24;
 	case 11: k3 ^= tail[10] << 16;
 	case 10: k3 ^= tail[ 9] << 8;
 	case  9: k3 ^= tail[ 8] << 0;
+
 	case  8: k2 ^= tail[ 7] << 24;
 	case  7: k2 ^= tail[ 6] << 16;
 	case  6: k2 ^= tail[ 5] << 8;
 	case  5: k2 ^= tail[ 4] << 0;
+
 	case  4: k1 ^= tail[ 3] << 24;
 	case  3: k1 ^= tail[ 2] << 16;
 	case  2: k1 ^= tail[ 1] << 8;
@@ -328,16 +264,18 @@ inline void bmix64 ( uint64_t & h1, uint64_t & h2, uint64_t & k1, uint64_t & k2,
 	k1 *= c1; 
 	k1  = _rotl64(k1,23); 
 	k1 *= c2;
-	h1 ^= k1;
-	h1 += h2;
-
-	h2 = _rotl64(h2,41);
 
 	k2 *= c2; 
 	k2  = _rotl64(k2,23);
 	k2 *= c1;
-	h2 ^= k2;
+
+	h1 = _rotl64(h1,17);
+	h1 += h2;
+	h1 ^= k1;
+
+	h2 = _rotl64(h2,41);
 	h2 += h1;
+	h2 ^= k2;
 
 	h1 = h1*3+0x52dce729;
 	h2 = h2*3+0x38495ab5;
@@ -434,27 +372,3 @@ void MurmurHash3_x64_128 ( const void * key, const int len, const uint32_t seed,
 }
 
 //-----------------------------------------------------------------------------
-// If we need a smaller hash value, it's faster to just use a portion of the 
-// 128-bit hash
-
-void MurmurHash3_x64_32 ( const void * key, int len, uint32_t seed, void * out )
-{
-	uint32_t temp[4];
-
-	MurmurHash3_x64_128(key,len,seed,temp);
-
-	*(uint32_t*)out = temp[0];
-}
-
-//----------
-
-void MurmurHash3_x64_64 ( const void * key, int len, uint32_t seed, void * out )
-{
-	uint64_t temp[2];
-
-	MurmurHash3_x64_128(key,len,seed,temp);
-
-	*(uint64_t*)out = temp[0];
-} 
-
-//-----------------------------------------------------------------------------
diff --git a/MurmurHashAligned.cpp b/MurmurHashAligned.cpp
index 716dda6..63dfe61 100644
--- a/MurmurHashAligned.cpp
+++ b/MurmurHashAligned.cpp
@@ -1,2 +1,2 @@
-#include "stdafx.h"
+//#include "stdafx.h"
 
diff --git a/MurmurHashAligned2.cpp b/MurmurHashAligned2.cpp
index 23dced4..83e9723 100644
--- a/MurmurHashAligned2.cpp
+++ b/MurmurHashAligned2.cpp
@@ -1,4 +1,4 @@
-#include "stdafx.h"
+//#include "stdafx.h"
 
 #pragma warning(disable:4311)
 
diff --git a/MurmurHashNeutral2.cpp b/MurmurHashNeutral2.cpp
index 716dda6..63dfe61 100644
--- a/MurmurHashNeutral2.cpp
+++ b/MurmurHashNeutral2.cpp
@@ -1,2 +1,2 @@
-#include "stdafx.h"
+//#include "stdafx.h"
 
diff --git a/MurmurHashTest.cpp b/MurmurHashTest.cpp
index 6b18f53..3bc6a96 100644
--- a/MurmurHashTest.cpp
+++ b/MurmurHashTest.cpp
@@ -7,20 +7,3 @@ uint64_t MurmurHash64B      ( const void * key, int len, uint64_t seed );
 uint32_t MurmurHash2A       ( const void * key, int len, uint32_t seed );
 uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed );
 uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed );
-
-
-void MurmurHash1_test ( const void * key, int len, uint32_t seed, void * out )
-{
-	*(uint32_t*)out = MurmurHash1(key,len,seed);
-}
-
-void MurmurHash2_test ( const void * key, int len, uint32_t seed, void * out )
-{
-	*(uint32_t*)out = MurmurHash2(key,len,seed);
-}
-
-void MurmurHash2A_test ( const void * key, int len, uint32_t seed, void * out )
-{
-	*(uint32_t*)out = MurmurHash2A(key,len,seed);
-}
-
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index dbfadcb..d95acd4 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -48,3 +48,60 @@ void BulkSpeedTest ( pfHash hash )
 
 	delete [] block;
 }
+
+//-----------------------------------------------------------------------------
+
+void TinySpeedTest ( pfHash hash, int hashsize, int keysize, bool verbose, double & outCycles )
+{
+	const int trials = 100000;
+
+	if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
+
+	uint8_t * h = new uint8_t[hashsize];
+	uint8_t * k = new uint8_t[keysize];
+
+	double bestcycles = 1e9;
+
+	for(int itrial = 0; itrial < trials; itrial++)
+	{
+		__int64 begin,end;
+
+		rand_p(k,keysize);
+
+		begin = __rdtsc();
+		
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+		hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);   hash(k,keysize,itrial,h);
+
+		end = __rdtsc();
+
+		//blackhole(*(uint32_t*)(&h));
+
+		double cycles = double(end-begin) / 64;
+		if(cycles < bestcycles) bestcycles = cycles;
+	}
+
+	double bestbpc = double(keysize) / bestcycles;
+	if(verbose) printf("%8.2f cycles/hash, %8.4f bytes/cycle\n",bestcycles,bestbpc);
+
+	outCycles = bestcycles;
+}
+
+//-----------------------------------------------------------------------------
diff --git a/SpeedTest.h b/SpeedTest.h
index 5a5ed54..a8f0086 100644
--- a/SpeedTest.h
+++ b/SpeedTest.h
@@ -3,59 +3,6 @@
 #include "Types.h"
 
 void BulkSpeedTest ( pfHash hash );
-
-//----------------------------------------------------------------------------
-
-template < typename hashtype, int keysize >
-void TinySpeedTest ( pfHash hash )
-{
-	const int trials = 100000;
-
-	printf("Small key speed test - %4d-byte keys - ",keysize);
-
-	uint8_t k[keysize];
-	hashtype h;
-
-	double bestcycles = 1e9;
-
-	for(int itrial = 0; itrial < trials; itrial++)
-	{
-		__int64 begin,end;
-
-		rand_p(k,keysize);
-
-		begin = __rdtsc();
-		
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-		hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);   hash(k,keysize,itrial,&h);
-
-		end = __rdtsc();
-
-		blackhole(*(uint32_t*)(&h));
-
-		double cycles = double(end-begin) / 64;
-		if(cycles < bestcycles) bestcycles = cycles;
-	}
-
-	double bestbpc = double(keysize) / bestcycles;
-	printf("%8.2f cycles/hash, %8.4f bytes/cycle\n",bestcycles,bestbpc);
-}
+void TinySpeedTest ( pfHash hash, int hashsize, int keysize, bool verbose, double & outCycles );
 
 //-----------------------------------------------------------------------------
diff --git a/Types.h b/Types.h
index eb2a309..d1eae9f 100644
--- a/Types.h
+++ b/Types.h
@@ -233,5 +233,6 @@ private:
 };
 
 typedef Blob<128> uint128_t;
+typedef Blob<256> uint256_t;
 
 //-----------------------------------------------------------------------------
diff --git a/main.cpp b/main.cpp
index f5fcf04..36636ea 100644
--- a/main.cpp
+++ b/main.cpp
@@ -81,6 +81,7 @@ HashInfo g_hashes[] =
 	{ FNV,                  32, "FNV",         "Fowler-Noll-Vo hash, 32-bit" },
 	{ lookup3_test,         32, "lookup3",     "Bob Jenkins' lookup3" },
 	{ SuperFastHash,        32, "superfast",   "Paul Hsieh's SuperFastHash" },
+	{ MurmurOAAT,           32, "MurmurOAAT",  "Murmur one-at-a-time" },
 	
 	// MurmurHash2
 
@@ -92,11 +93,7 @@ HashInfo g_hashes[] =
 	// MurmurHash3
 
 	{ MurmurHash3_x86_32,   32, "Murmur3A",    "MurmurHash3 for x86, 32-bit" },
-	{ MurmurHash3_x86_64,   64, "Murmur3B",    "MurmurHash3 for x86, 64-bit" },
 	{ MurmurHash3_x86_128, 128, "Murmur3C",    "MurmurHash3 for x86, 128-bit" },
-
-	{ MurmurHash3_x64_32,   32, "Murmur3D",    "MurmurHash3 for x64, 32-bit" },
-	{ MurmurHash3_x64_64,   64, "Murmur3E",    "MurmurHash3 for x64, 64-bit" },
 	{ MurmurHash3_x64_128, 128, "Murmur3F",    "MurmurHash3 for x64, 128-bit" },
 
 };
@@ -130,7 +127,6 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
 
 		QuickBrownFox(hash,hashbits);
 		SanityTest(hash,hashbits);
-		AlignmentTest(hash,hashbits);
 		AppendedZeroesTest(hash,hashbits);
 		printf("\n");
 	}
@@ -145,12 +141,21 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
 		BulkSpeedTest(hash);
 		printf("\n");
 
-		TinySpeedTest<hashtype,4>(hash);
-		TinySpeedTest<hashtype,8>(hash);
-		TinySpeedTest<hashtype,16>(hash);
-		TinySpeedTest<hashtype,32>(hash);
-		TinySpeedTest<hashtype,64>(hash);
-		TinySpeedTest<hashtype,128>(hash);
+		for(int i = 1; i < 32; i++)
+		{
+			double cycles;
+
+			TinySpeedTest(hash,sizeof(hashtype),i,true,cycles);
+		}
+
+		for(int i = 32; i <= 2048; i += 32)
+		{
+			double cycles;
+
+
+			TinySpeedTest(hash,sizeof(hashtype),i,true,cycles);
+		}
+
 		printf("\n");
 	}
 
@@ -181,10 +186,44 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
 	{
 		printf("[[[ Avalanche Tests ]]]\n\n");
 
-		const int hashbits = sizeof(hashtype) * 8;
+		//const int hashbits = sizeof(hashtype) * 8;
 		bool result = true;
 
-		result &= AvalancheTest< Blob<hashbits * 2>, hashtype > (hash,2000000);
+		result &= AvalancheTest< Blob< 32>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob< 40>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob< 48>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob< 56>, hashtype > (hash,300000);
+
+		result &= AvalancheTest< Blob< 64>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob< 72>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob< 80>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob< 88>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob< 96>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<104>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<112>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<120>, hashtype > (hash,300000);
+
+		result &= AvalancheTest< Blob<128>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<136>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<144>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<152>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<160>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<168>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<176>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<184>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<192>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<200>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<208>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<216>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<224>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<232>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<240>, hashtype > (hash,300000);
+		result &= AvalancheTest< Blob<248>, hashtype > (hash,300000);
+
+		result &= AvalancheTest< Blob<256>, hashtype > (hash,300000);
+
+		//result &= AvalancheTest< Blob<hashbits * 2>, hashtype > (hash,200000);
+		//result &= AvalancheTest< Blob<768>, hashtype > (hash,2000000);
 
 		// The bit independence test is slow and not particularly useful...
 		//result &= BicTest < Blob<hashbits * 2>, hashtype > ( hash, 1000000 );
@@ -241,34 +280,139 @@ void test ( hashfunc<hashtype> hash, const char * hashname )
 
 	if(g_testPermutation || g_testAll)
 	{
-		printf("[[[ Keyset 'Permutation' Tests ]]]\n\n");
+		{
+			// This one breaks lookup3, surprisingly
 
-		bool result = true;
-		bool drawDiagram = false;
+			printf("[[[ Keyset 'Combination Lowbits' Tests ]]]\n\n");
+
+			bool result = true;
+			bool drawDiagram = false;
 
-		// This very sparse set of blocks is particularly hard for SuperFastHash
+			uint32_t blocks[] =
+			{
+				0x00000000, 
+				
+				0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
+			};
+
+			result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+			if(!result) printf("*********FAIL*********\n");
+			printf("\n");
+		}
 
-		uint32_t blocks[] =
 		{
-			0x00000000,
-			0x00000001,
-			0x00000002,
-			
-			0x00000400,
-			0x00008000,
-			
-			0x00080000,
-			0x00200000,
-
-			0x20000000,
-			0x40000000,
-			0x80000000,
-		};
-
-		result &= PermutationKeyTest<hashtype>(hash,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+			printf("[[[ Keyset 'Combination Highbits' Tests ]]]\n\n");
 
-		if(!result) printf("*********FAIL*********\n");
-		printf("\n");
+			bool result = true;
+			bool drawDiagram = false;
+
+			uint32_t blocks[] =
+			{
+				0x00000000, 
+				
+				0x20000000, 0x40000000, 0x60000000, 0x80000000, 0xA0000000, 0xC0000000, 0xE0000000
+			};
+
+			result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+			if(!result) printf("*********FAIL*********\n");
+			printf("\n");
+		}
+
+		{
+			printf("[[[ Keyset 'Combination 0x8000000' Tests ]]]\n\n");
+
+			bool result = true;
+			bool drawDiagram = false;
+
+			uint32_t blocks[] =
+			{
+				0x00000000, 
+				
+				0x80000000,
+			};
+
+			result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+			if(!result) printf("*********FAIL*********\n");
+			printf("\n");
+		}
+
+		{
+			printf("[[[ Keyset 'Combination 0x0000001' Tests ]]]\n\n");
+
+			bool result = true;
+			bool drawDiagram = false;
+
+			uint32_t blocks[] =
+			{
+				0x00000000, 
+				
+				0x00000001,
+			};
+
+			result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+			if(!result) printf("*********FAIL*********\n");
+			printf("\n");
+		}
+
+		{
+			printf("[[[ Keyset 'Combination Hi-Lo' Tests ]]]\n\n");
+
+			bool result = true;
+			bool drawDiagram = false;
+
+			uint32_t blocks[] =
+			{
+				0x00000000, 
+				
+				0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
+
+				0x80000000, 0x40000000, 0xC0000000, 0x20000000, 0xA0000000, 0x60000000, 0xE0000000
+			};
+
+			result &= CombinationKeyTest<hashtype>(hash,6,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+			if(!result) printf("*********FAIL*********\n");
+			printf("\n");
+		}
+
+		//----------
+
+		/*
+		{
+			printf("[[[ Keyset 'Permutation' Tests ]]]\n\n");
+
+			bool result = true;
+			bool drawDiagram = false;
+
+			// This very sparse set of blocks is particularly hard for SuperFastHash
+
+			uint32_t blocks[] =
+			{
+				0x00000000,
+				0x00000001,
+				0x00000002,
+				
+				0x00000400,
+				0x00008000,
+				
+				0x00080000,
+				0x00200000,
+
+				0x20000000,
+				0x40000000,
+				0x80000000,
+			};
+
+			result &= PermutationKeyTest<hashtype>(hash,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
+
+			if(!result) printf("*********FAIL*********\n");
+			printf("\n");
+		}
+		*/
 	}
 
 	//-----------------------------------------------------------------------------
@@ -370,6 +514,10 @@ void testHash ( const char * name )
 		{
 			test<uint128_t>( pInfo->hash, pInfo->desc );
 		}
+		else if(pInfo->hashbits == 256)
+		{
+			test<uint256_t>( pInfo->hash, pInfo->desc );
+		}
 		else
 		{
 			printf("Invalid hash bit width %d for hash '%s'",pInfo->hashbits,pInfo->name);
@@ -393,9 +541,11 @@ int main ( int argc, char ** argv )
 	//g_testSanity = true;
 	//g_testSpeed = true;
 	//g_testAvalanche = true;
+	//g_testCyclic = true;
 	//g_testDiff = true;
 	//g_testSparse = true;
 	//g_testPermutation = true;
+	//g_testZeroes = true;
 
 	//testHash("rand32");
 	//testHash("rand64");
@@ -408,22 +558,24 @@ int main ( int argc, char ** argv )
 	//printf("Called the hash function %I64d times, %I64d bytes hashed\n",g_hashcount,g_bytecount);
 
 	//testHash("crc32");
+	//testHash("rand128");
 
 	//testHash("fnv");
 	//testHash("superfast");
 	//testHash("lookup3");
+	//testHash("MurmurOAAT");
 
 	//testHash("murmur2");
 	//testHash("murmur2B");
 	//testHash("murmur2C");
 
-	testHash("murmur3a");
-	testHash("murmur3b");
-	testHash("murmur3c");
+	//testHash("murmur3a");
+	//testHash("murmur3b");
+	//testHash("murmur3c");
 
-	testHash("murmur3d");
-	testHash("murmur3e");
-	testHash("murmur3f");
+	//testHash("murmur3d");
+	//testHash("murmur3e");
+	//testHash("murmur3f");
 
 	//----------
 
@@ -432,4 +584,4 @@ int main ( int argc, char ** argv )
 	printf("time %d\n",b-a);
 
 	return 0;
-}
+}
+\ No newline at end of file
author	tanjent@gmail.com <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>	2011-02-28 06:03:12 +0000
committer	tanjent@gmail.com <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>	2011-02-28 06:03:12 +0000
commit	babb553380f2d223ebc4730830233c5d224963e8 (patch)
tree	e2cca8323914c05fc6c888841d581a0c7db0c384
parent	9d17d0b842230279e2a89b2e65a7dde05494d529 (diff)
download	src-babb553380f2d223ebc4730830233c5d224963e8.tar.gz