From babb553380f2d223ebc4730830233c5d224963e8 Mon Sep 17 00:00:00 2001 From: "tanjent@gmail.com" Date: Mon, 28 Feb 2011 06:03:12 +0000 Subject: Tweak Murmur3, new more rigorous sanity test, combinatorial block test git-svn-id: http://smhasher.googlecode.com/svn/trunk@76 77a7d1d3-4c08-bdc2-d393-d5859734b01a --- Bitvec.cpp | 14 +++ Bitvec.h | 1 + Hashes.cpp | 32 ++++++- Hashes.h | 4 + KeysetTest.cpp | 178 +++++++++++++----------------------- KeysetTest.h | 56 ++++++++++++ MurmurHash3.cpp | 200 ++++++++++++----------------------------- MurmurHashAligned.cpp | 2 +- MurmurHashAligned2.cpp | 2 +- MurmurHashNeutral2.cpp | 2 +- MurmurHashTest.cpp | 17 ---- SpeedTest.cpp | 57 ++++++++++++ SpeedTest.h | 55 +----------- Types.h | 1 + main.cpp | 238 ++++++++++++++++++++++++++++++++++++++++--------- 15 files changed, 480 insertions(+), 379 deletions(-) diff --git a/Bitvec.cpp b/Bitvec.cpp index 463c1e4..6939980 100644 --- a/Bitvec.cpp +++ b/Bitvec.cpp @@ -71,6 +71,20 @@ void printhex32 ( void * blob, int len ) printf("}"); } +void printbytes ( void * blob, int len ) +{ + uint8_t * d = (uint8_t*)blob; + + printf("{ "); + + for(int i = 0; i < len; i++) + { + printf("0x%02x, ",d[i]); + } + + printf(" };"); +} + //----------------------------------------------------------------------------- uint32_t getbit ( void * block, int len, uint32_t bit ) diff --git a/Bitvec.h b/Bitvec.h index fce97d5..1a475d1 100644 --- a/Bitvec.h +++ b/Bitvec.h @@ -8,6 +8,7 @@ void printbits ( void * blob, int len ); void printhex32 ( void * blob, int len ); +void printbytes ( void * blob, int len ); uint32_t getbit ( void * blob, int len, uint32_t bit ); uint32_t getbit_wrap ( void * blob, int len, uint32_t bit ); diff --git a/Hashes.cpp b/Hashes.cpp index f6787c6..cebd37f 100644 --- a/Hashes.cpp +++ b/Hashes.cpp @@ -2,6 +2,13 @@ #include "Random.h" + +#include +//#include +#include +#include +#include + //---------------------------------------------------------------------------- // fake / bad hashes @@ -54,6 +61,20 @@ void sumhash ( const void * key, int len, uint32_t seed, void * out ) *(uint32_t*)out = h; } +void sumhash32 ( const void * key, int len, uint32_t seed, void * out ) +{ + uint32_t h = seed; + + const uint32_t * data = (const uint32_t*)key; + + for(int i = 0; i < len/4; i++) + { + h += data[i]; + } + + *(uint32_t*)out = h; +} + void DoNothingHash ( const void *, int, uint32_t, void * ) { return; @@ -62,20 +83,23 @@ void DoNothingHash ( const void *, int, uint32_t, void * ) //----------------------------------------------------------------------------- // One-byte-at-a-time hash based on Murmur's mix -uint32_t MurmurOAAT ( const void * key, int len, uint32_t h ) +void MurmurOAAT ( const void * key, int len, uint32_t seed, void * out ) { const uint8_t * data = (const uint8_t*)key; - h ^= len; + uint32_t h = seed ^ len; for(int i = 0; i < len; i++) { h ^= data[i]; h *= 0x5bd1e995; - h ^= h >> 16; + h ^= h >> 15; } - return h; + h *= 0x5bd1e995; + h ^= h >> 15; + + *(uint32_t*)out = h; } //---------------------------------------------------------------------------- diff --git a/Hashes.h b/Hashes.h index 1aad04c..a580113 100644 --- a/Hashes.h +++ b/Hashes.h @@ -9,6 +9,9 @@ //---------- // These are _not_ hash functions (even though people tend to use crc32 as one...) +void sumhash ( const void * key, int len, uint32_t seed, void * out ); +void sumhash32 ( const void * key, int len, uint32_t seed, void * out ); + void DoNothingHash ( const void * key, int len, uint32_t seed, void * out ); void crc32 ( const void * key, int len, uint32_t seed, void * out ); @@ -28,6 +31,7 @@ void sha1_32a ( const void * key, int len, uint32_t seed, void * ou void FNV ( const void * key, int len, uint32_t seed, void * out ); void SuperFastHash ( const void * key, int len, uint32_t seed, void * out ); void lookup3_test ( const void * key, int len, uint32_t seed, void * out ); +void MurmurOAAT ( const void * key, int len, uint32_t seed, void * out ); //---------- // MurmurHash2 diff --git a/KeysetTest.cpp b/KeysetTest.cpp index 70b63de..6436826 100644 --- a/KeysetTest.cpp +++ b/KeysetTest.cpp @@ -29,55 +29,81 @@ void QuickBrownFox ( pfHash hash, const int hashbits ) } //---------------------------------------------------------------------------- -// Alignment of the keys should not affect the hash value - if it does, -// something is broken. +// Basic sanity checks - -void AlignmentTest ( pfHash hash, const int hashbits ) -{ - const int hashbytes = hashbits / 8; +// A hash function should not be reading outside the bounds of the key. - printf("Testing alignment handling.........."); +// Flipping a bit of a key should, with overwhelmingly high probability, +// result in a different hash. - char testblob[512]; - rand_p(testblob,512); +// Hashing the same key twice should always produce the same result. + +// The memory alignment of the key should not affect the hash result. - char * bufs[16]; - char * strings[16]; +bool SanityTest ( pfHash hash, const int hashbits ) +{ printf("Testing bit twiddling"); + + bool result = true; - for(int i = 0; i < 16; i++) + const int hashbytes = hashbits/8; + const int reps = 10; + const int keymax = 128; + const int pad = 16; + const int buflen = keymax + pad*3; + + uint8_t * buffer1 = new uint8_t[buflen]; + uint8_t * buffer2 = new uint8_t[buflen]; + + uint8_t * hash1 = new uint8_t[hashbytes]; + uint8_t * hash2 = new uint8_t[hashbytes]; + + //---------- + + for(int irep = 0; irep < reps; irep++) { - bufs[i] = new char[1024]; - uint32_t b = uint32_t(bufs[i]); + if(irep % (reps/10) == 0) printf("."); - b = (b+15)&(~15); + for(int len = 4; len <= keymax; len++) + { + for(int offset = pad; offset < pad*2; offset++) + { + uint8_t * key1 = &buffer1[pad]; + uint8_t * key2 = &buffer2[pad+offset]; - strings[i] = (char*)(b + i); - - memcpy(strings[i],testblob,512); - } + rand_p(buffer1,buflen); + rand_p(buffer2,buflen); - bool result = true; + memcpy(key2,key1,len); - uint32_t hash1[64]; - uint32_t hash2[64]; + hash(key1,len,0,hash1); - for(int k = 1; k <= 512; k++) - for(int j = 0; j < 15; j++) - for(int i = j+1; i < 16; i++) - { - const char * s1 = strings[i]; - const char * s2 = strings[j]; + for(int bit = 0; bit < (len * 8); bit++) + { + // Flip a bit, hash the key -> we should get a different result. - hash(s1,k,0,hash1); - hash(s2,k,0,hash2); + flipbit(key2,len,bit); + hash(key2,len,0,hash2); - if(memcmp(hash1,hash2,hashbytes) != 0) - { - result = false; + if(memcmp(hash1,hash2,hashbytes) == 0) + { + result = false; + } + + // Flip it back, hash again -> we should get the original result. + + flipbit(key2,len,bit); + hash(key2,len,0,hash2); + + if(memcmp(hash1,hash2,hashbytes) != 0) + { + result = false; + } + } + } } } - if(!result) + if(result == false) { printf("*********FAIL*********\n"); } @@ -86,7 +112,10 @@ void AlignmentTest ( pfHash hash, const int hashbits ) printf("PASS\n"); } - for(int i = 0; i < 16; i++) delete [] bufs[i]; + delete [] hash1; + delete [] hash2; + + return result; } //---------------------------------------------------------------------------- @@ -132,85 +161,4 @@ void AppendedZeroesTest ( pfHash hash, const int hashbits ) printf("PASS\n"); } -//---------------------------------------------------------------------------- -// Basic sanity checks - - -// A hash function should not be reading outside the bounds of the key. - -// Flipping a bit of a key should, with overwhelmingly high probability, -// result in a different hash. - -// Hashing the same key twice should always produce the same result. - -bool SanityTest ( pfHash hash, const int hashbits ) -{ - bool result = true; - - const int hashbytes = hashbits/8; - const int reps = 100; - - printf("Testing bit twiddling"); - - uint8_t buffer[256]; - uint8_t * key = &buffer[64]; - - uint8_t * h1 = new uint8_t[hashbytes]; - uint8_t * h2 = new uint8_t[hashbytes]; - - for(int irep = 0; irep < reps; irep++) - { - if(irep % (reps/10) == 0) printf("."); - - for(int len = 1; len <= 128; len++) - { - // Generate a random key in the middle of the buffer, hash it, - // and then fill the space around the key with garbage. If a - // broken hash function reads past the ends of the key, it should - // fail the "did we get the same hash?" test below. - - rand_p(key,len); - hash(key,len,0,h1); - - rand_p(buffer,64); - rand_p(key+len,64); - - // Flip a bit, hash the key -> we should get a different result. - // Flip it back, hash again -> we should get the same result. - - for(int bit = 0; bit < (len * 8); bit++) - { - flipbit(key,len,bit); - hash(key,len,0,h2); - - if(memcmp(h1,h2,hashbytes) == 0) - { - result = false; - } - - flipbit(key,len,bit); - hash(key,len,0,h2); - - if(memcmp(h1,h2,hashbytes) != 0) - { - result = false; - } - } - } - } - - if(result == false) - { - printf("*********FAIL*********\n"); - } - else - { - printf("PASS\n"); - } - - delete [] h1; - delete [] h2; - - return result; -} - //----------------------------------------------------------------------------- diff --git a/KeysetTest.h b/KeysetTest.h index 53b61ec..7ef398c 100644 --- a/KeysetTest.h +++ b/KeysetTest.h @@ -20,6 +20,62 @@ void QuickBrownFox ( pfHash hash, const int hashbits ); void AlignmentTest ( pfHash hash, const int hashbits ); void AppendedZeroesTest ( pfHash hash, const int hashbits ); +//----------------------------------------------------------------------------- +// Keyset 'Combination' - all possible combinations of input blocks + +template< typename hashtype > +void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen, + uint32_t * blocks, int blockcount, + pfHash hash, std::vector & hashes ) +{ + if(len == maxlen) return; + + for(int i = 0; i < blockcount; i++) + { + key[len] = blocks[i]; + + //if(len == maxlen-1) + { + hashtype h; + hash(key,(len+1) * sizeof(uint32_t),0,&h); + hashes.push_back(h); + } + + //else + { + CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes); + } + } +} + +template< typename hashtype > +bool CombinationKeyTest ( hashfunc hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram ) +{ + printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount); + + //---------- + + std::vector hashes; + + uint32_t * key = new uint32_t[maxlen]; + + CombinationKeygenRecurse(key,0,maxlen,blocks,blockcount,hash,hashes); + + delete [] key; + + printf("%d keys\n",(int)hashes.size()); + + //---------- + + bool result = true; + + result &= TestHashList(hashes,testColl,testDist,drawDiagram); + + printf("\n"); + + return result; +} + //---------------------------------------------------------------------------- // Keyset 'Permutation' - given a set of 32-bit blocks, generate keys // consisting of all possible permutations of those blocks diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp index c66bd87..75e0209 100644 --- a/MurmurHash3.cpp +++ b/MurmurHash3.cpp @@ -1,7 +1,12 @@ #include "MurmurHash3.h" - #include // for _rotl +// Note - The x86 and x64 versions do _not_ produce the same results, as the +// algorithms are optimized for their respective platforms. You can still +// compile and run any of them on any platform, but your performance with the +// non-native version will be less than optimal. + + //----------------------------------------------------------------------------- // Block read - if your platform needs to do endian-swapping or can only // handle aligned reads, do the conversion here @@ -12,43 +17,32 @@ inline uint32_t getblock ( const uint32_t * p, int i ) } //---------- -// Finalization mix - force all bits of a hash block to avalanche - -// avalanches all bits to within 0.25% bias - -inline uint32_t fmix32 ( uint32_t h ) -{ - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - h ^= h >> 16; - - return h; -} - -//----------------------------------------------------------------------------- inline void bmix32 ( uint32_t & h1, uint32_t & k1, uint32_t & c1, uint32_t & c2 ) { + c1 = c1*5+0x7b7d159c; + c2 = c2*5+0x6bce6396; + k1 *= c1; - k1 = _rotl(k1,11); + k1 = _rotl(k1,11); k1 *= c2; - h1 ^= k1; - - h1 = h1*3+0x52dce729; - c1 = c1*5+0x7b7d159c; - c2 = c2*5+0x6bce6396; + h1 = _rotl(h1,13); + h1 = h1*5+0x52dce729; + h1 ^= k1; } //---------- +//void MurmurHash3_x86_32 ( const void * key, int len, const void * seed, void * out ) void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out ) { const uint8_t * data = (const uint8_t*)key; const int nblocks = len / 4; + //uint32_t * s = (uint32_t*)seed; + + //uint32_t h1 = 0x971e137b ^ s[0]; uint32_t h1 = 0x971e137b ^ seed; uint32_t c1 = 0x95543787; @@ -86,100 +80,17 @@ void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out ) h1 ^= len; - h1 = fmix32(h1); + h1 *= 0x85ebca6b; + h1 ^= h1 >> 13; + h1 *= 0xc2b2ae35; + h1 ^= h1 >> 16; + + //h1 ^= s[0]; + h1 ^= seed; *(uint32_t*)out = h1; } -//----------------------------------------------------------------------------- - -inline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & k1, uint32_t & k2, uint32_t & c1, uint32_t & c2 ) -{ - k1 *= c1; - k1 = _rotl(k1,11); - k1 *= c2; - h1 ^= k1; - h1 += h2; - - h2 = _rotl(h2,17); - - k2 *= c2; - k2 = _rotl(k2,11); - k2 *= c1; - h2 ^= k2; - h2 += h1; - - h1 = h1*3+0x52dce729; - h2 = h2*3+0x38495ab5; - - c1 = c1*5+0x7b7d159c; - c2 = c2*5+0x6bce6396; -} - -//---------- - -void MurmurHash3_x86_64 ( const void * key, const int len, const uint32_t seed, void * out ) -{ - const uint8_t * data = (const uint8_t*)key; - const int nblocks = len / 8; - - uint32_t h1 = 0x8de1c3ac ^ seed; - uint32_t h2 = 0xbab98226 ^ seed; - - uint32_t c1 = 0x95543787; - uint32_t c2 = 0x2ad7eb25; - - //---------- - // body - - const uint32_t * blocks = (const uint32_t *)(data + nblocks*8); - - for(int i = -nblocks; i; i++) - { - uint32_t k1 = getblock(blocks,i*2+0); - uint32_t k2 = getblock(blocks,i*2+1); - - bmix32(h1,h2,k1,k2,c1,c2); - } - - //---------- - // tail - - const uint8_t * tail = (const uint8_t*)(data + nblocks*8); - - uint32_t k1 = 0; - uint32_t k2 = 0; - - switch(len & 7) - { - case 7: k2 ^= tail[6] << 16; - case 6: k2 ^= tail[5] << 8; - case 5: k2 ^= tail[4] << 0; - case 4: k1 ^= tail[3] << 24; - case 3: k1 ^= tail[2] << 16; - case 2: k1 ^= tail[1] << 8; - case 1: k1 ^= tail[0] << 0; - bmix32(h1,h2,k1,k2,c1,c2); - }; - - //---------- - // finalization - - h2 ^= len; - - h1 += h2; - h2 += h1; - - h1 = fmix32(h1); - h2 = fmix32(h2); - - h1 += h2; - h2 += h1; - - ((uint32_t*)out)[0] = h1; - ((uint32_t*)out)[1] = h2; -} - //----------------------------------------------------------------------------- // This mix is large enough that VC++ refuses to inline it unless we use // __forceinline. It's also not all that fast due to register spillage. @@ -230,12 +141,34 @@ __forceinline void bmix32 ( uint32_t & h1, uint32_t & h2, uint32_t & h3, uint32_ } //---------- +// Finalization mix - force all bits of a hash block to avalanche -void MurmurHash3_x86_128 ( const void * key, const int len, const uint32_t seed, void * out ) +// avalanches all bits to within 0.25% bias + +inline uint32_t fmix32 ( uint32_t h ) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +//void MurmurHash3_x86_128 ( const void * key, const int len, const void * seed, void * out ) +void MurmurHash3_x86_128 ( const void * key, const int len, uint32_t seed, void * out ) { const uint8_t * data = (const uint8_t*)key; const int nblocks = len / 16; + //uint32_t * s = (uint32_t*)(seed); + + //uint32_t h1 = 0x8de1c3ac ^ s[0]; + //uint32_t h2 = 0xbab98226 ^ s[1]; + //uint32_t h3 = 0xfcba5b2d ^ s[2]; + //uint32_t h4 = 0x32452e3e ^ s[3]; + uint32_t h1 = 0x8de1c3ac ^ seed; uint32_t h2 = 0xbab98226 ^ seed; uint32_t h3 = 0xfcba5b2d ^ seed; @@ -274,14 +207,17 @@ void MurmurHash3_x86_128 ( const void * key, const int len, const uint32_t seed, case 15: k4 ^= tail[14] << 16; case 14: k4 ^= tail[13] << 8; case 13: k4 ^= tail[12] << 0; + case 12: k3 ^= tail[11] << 24; case 11: k3 ^= tail[10] << 16; case 10: k3 ^= tail[ 9] << 8; case 9: k3 ^= tail[ 8] << 0; + case 8: k2 ^= tail[ 7] << 24; case 7: k2 ^= tail[ 6] << 16; case 6: k2 ^= tail[ 5] << 8; case 5: k2 ^= tail[ 4] << 0; + case 4: k1 ^= tail[ 3] << 24; case 3: k1 ^= tail[ 2] << 16; case 2: k1 ^= tail[ 1] << 8; @@ -328,16 +264,18 @@ inline void bmix64 ( uint64_t & h1, uint64_t & h2, uint64_t & k1, uint64_t & k2, k1 *= c1; k1 = _rotl64(k1,23); k1 *= c2; - h1 ^= k1; - h1 += h2; - - h2 = _rotl64(h2,41); k2 *= c2; k2 = _rotl64(k2,23); k2 *= c1; - h2 ^= k2; + + h1 = _rotl64(h1,17); + h1 += h2; + h1 ^= k1; + + h2 = _rotl64(h2,41); h2 += h1; + h2 ^= k2; h1 = h1*3+0x52dce729; h2 = h2*3+0x38495ab5; @@ -434,27 +372,3 @@ void MurmurHash3_x64_128 ( const void * key, const int len, const uint32_t seed, } //----------------------------------------------------------------------------- -// If we need a smaller hash value, it's faster to just use a portion of the -// 128-bit hash - -void MurmurHash3_x64_32 ( const void * key, int len, uint32_t seed, void * out ) -{ - uint32_t temp[4]; - - MurmurHash3_x64_128(key,len,seed,temp); - - *(uint32_t*)out = temp[0]; -} - -//---------- - -void MurmurHash3_x64_64 ( const void * key, int len, uint32_t seed, void * out ) -{ - uint64_t temp[2]; - - MurmurHash3_x64_128(key,len,seed,temp); - - *(uint64_t*)out = temp[0]; -} - -//----------------------------------------------------------------------------- diff --git a/MurmurHashAligned.cpp b/MurmurHashAligned.cpp index 716dda6..63dfe61 100644 --- a/MurmurHashAligned.cpp +++ b/MurmurHashAligned.cpp @@ -1,2 +1,2 @@ -#include "stdafx.h" +//#include "stdafx.h" diff --git a/MurmurHashAligned2.cpp b/MurmurHashAligned2.cpp index 23dced4..83e9723 100644 --- a/MurmurHashAligned2.cpp +++ b/MurmurHashAligned2.cpp @@ -1,4 +1,4 @@ -#include "stdafx.h" +//#include "stdafx.h" #pragma warning(disable:4311) diff --git a/MurmurHashNeutral2.cpp b/MurmurHashNeutral2.cpp index 716dda6..63dfe61 100644 --- a/MurmurHashNeutral2.cpp +++ b/MurmurHashNeutral2.cpp @@ -1,2 +1,2 @@ -#include "stdafx.h" +//#include "stdafx.h" diff --git a/MurmurHashTest.cpp b/MurmurHashTest.cpp index 6b18f53..3bc6a96 100644 --- a/MurmurHashTest.cpp +++ b/MurmurHashTest.cpp @@ -7,20 +7,3 @@ uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed ); uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed ); uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed ); uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed ); - - -void MurmurHash1_test ( const void * key, int len, uint32_t seed, void * out ) -{ - *(uint32_t*)out = MurmurHash1(key,len,seed); -} - -void MurmurHash2_test ( const void * key, int len, uint32_t seed, void * out ) -{ - *(uint32_t*)out = MurmurHash2(key,len,seed); -} - -void MurmurHash2A_test ( const void * key, int len, uint32_t seed, void * out ) -{ - *(uint32_t*)out = MurmurHash2A(key,len,seed); -} - diff --git a/SpeedTest.cpp b/SpeedTest.cpp index dbfadcb..d95acd4 100644 --- a/SpeedTest.cpp +++ b/SpeedTest.cpp @@ -48,3 +48,60 @@ void BulkSpeedTest ( pfHash hash ) delete [] block; } + +//----------------------------------------------------------------------------- + +void TinySpeedTest ( pfHash hash, int hashsize, int keysize, bool verbose, double & outCycles ) +{ + const int trials = 100000; + + if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize); + + uint8_t * h = new uint8_t[hashsize]; + uint8_t * k = new uint8_t[keysize]; + + double bestcycles = 1e9; + + for(int itrial = 0; itrial < trials; itrial++) + { + __int64 begin,end; + + rand_p(k,keysize); + + begin = __rdtsc(); + + hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); + hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); + hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); + hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); + + hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); + hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); + hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); + hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); + + hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); + hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); + hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); + hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); + + hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); + hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); + hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); + hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); hash(k,keysize,itrial,h); + + end = __rdtsc(); + + //blackhole(*(uint32_t*)(&h)); + + double cycles = double(end-begin) / 64; + if(cycles < bestcycles) bestcycles = cycles; + } + + double bestbpc = double(keysize) / bestcycles; + if(verbose) printf("%8.2f cycles/hash, %8.4f bytes/cycle\n",bestcycles,bestbpc); + + outCycles = bestcycles; +} + +//----------------------------------------------------------------------------- diff --git a/SpeedTest.h b/SpeedTest.h index 5a5ed54..a8f0086 100644 --- a/SpeedTest.h +++ b/SpeedTest.h @@ -3,59 +3,6 @@ #include "Types.h" void BulkSpeedTest ( pfHash hash ); - -//---------------------------------------------------------------------------- - -template < typename hashtype, int keysize > -void TinySpeedTest ( pfHash hash ) -{ - const int trials = 100000; - - printf("Small key speed test - %4d-byte keys - ",keysize); - - uint8_t k[keysize]; - hashtype h; - - double bestcycles = 1e9; - - for(int itrial = 0; itrial < trials; itrial++) - { - __int64 begin,end; - - rand_p(k,keysize); - - begin = __rdtsc(); - - hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); - hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); - hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); - hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); - - hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); - hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); - hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); - hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); - - hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); - hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); - hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); - hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); - - hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); - hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); - hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); - hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); hash(k,keysize,itrial,&h); - - end = __rdtsc(); - - blackhole(*(uint32_t*)(&h)); - - double cycles = double(end-begin) / 64; - if(cycles < bestcycles) bestcycles = cycles; - } - - double bestbpc = double(keysize) / bestcycles; - printf("%8.2f cycles/hash, %8.4f bytes/cycle\n",bestcycles,bestbpc); -} +void TinySpeedTest ( pfHash hash, int hashsize, int keysize, bool verbose, double & outCycles ); //----------------------------------------------------------------------------- diff --git a/Types.h b/Types.h index eb2a309..d1eae9f 100644 --- a/Types.h +++ b/Types.h @@ -233,5 +233,6 @@ private: }; typedef Blob<128> uint128_t; +typedef Blob<256> uint256_t; //----------------------------------------------------------------------------- diff --git a/main.cpp b/main.cpp index f5fcf04..36636ea 100644 --- a/main.cpp +++ b/main.cpp @@ -81,6 +81,7 @@ HashInfo g_hashes[] = { FNV, 32, "FNV", "Fowler-Noll-Vo hash, 32-bit" }, { lookup3_test, 32, "lookup3", "Bob Jenkins' lookup3" }, { SuperFastHash, 32, "superfast", "Paul Hsieh's SuperFastHash" }, + { MurmurOAAT, 32, "MurmurOAAT", "Murmur one-at-a-time" }, // MurmurHash2 @@ -92,11 +93,7 @@ HashInfo g_hashes[] = // MurmurHash3 { MurmurHash3_x86_32, 32, "Murmur3A", "MurmurHash3 for x86, 32-bit" }, - { MurmurHash3_x86_64, 64, "Murmur3B", "MurmurHash3 for x86, 64-bit" }, { MurmurHash3_x86_128, 128, "Murmur3C", "MurmurHash3 for x86, 128-bit" }, - - { MurmurHash3_x64_32, 32, "Murmur3D", "MurmurHash3 for x64, 32-bit" }, - { MurmurHash3_x64_64, 64, "Murmur3E", "MurmurHash3 for x64, 64-bit" }, { MurmurHash3_x64_128, 128, "Murmur3F", "MurmurHash3 for x64, 128-bit" }, }; @@ -130,7 +127,6 @@ void test ( hashfunc hash, const char * hashname ) QuickBrownFox(hash,hashbits); SanityTest(hash,hashbits); - AlignmentTest(hash,hashbits); AppendedZeroesTest(hash,hashbits); printf("\n"); } @@ -145,12 +141,21 @@ void test ( hashfunc hash, const char * hashname ) BulkSpeedTest(hash); printf("\n"); - TinySpeedTest(hash); - TinySpeedTest(hash); - TinySpeedTest(hash); - TinySpeedTest(hash); - TinySpeedTest(hash); - TinySpeedTest(hash); + for(int i = 1; i < 32; i++) + { + double cycles; + + TinySpeedTest(hash,sizeof(hashtype),i,true,cycles); + } + + for(int i = 32; i <= 2048; i += 32) + { + double cycles; + + + TinySpeedTest(hash,sizeof(hashtype),i,true,cycles); + } + printf("\n"); } @@ -181,10 +186,44 @@ void test ( hashfunc hash, const char * hashname ) { printf("[[[ Avalanche Tests ]]]\n\n"); - const int hashbits = sizeof(hashtype) * 8; + //const int hashbits = sizeof(hashtype) * 8; bool result = true; - result &= AvalancheTest< Blob, hashtype > (hash,2000000); + result &= AvalancheTest< Blob< 32>, hashtype > (hash,300000); + result &= AvalancheTest< Blob< 40>, hashtype > (hash,300000); + result &= AvalancheTest< Blob< 48>, hashtype > (hash,300000); + result &= AvalancheTest< Blob< 56>, hashtype > (hash,300000); + + result &= AvalancheTest< Blob< 64>, hashtype > (hash,300000); + result &= AvalancheTest< Blob< 72>, hashtype > (hash,300000); + result &= AvalancheTest< Blob< 80>, hashtype > (hash,300000); + result &= AvalancheTest< Blob< 88>, hashtype > (hash,300000); + result &= AvalancheTest< Blob< 96>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<104>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<112>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<120>, hashtype > (hash,300000); + + result &= AvalancheTest< Blob<128>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<136>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<144>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<152>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<160>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<168>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<176>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<184>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<192>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<200>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<208>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<216>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<224>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<232>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<240>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<248>, hashtype > (hash,300000); + + result &= AvalancheTest< Blob<256>, hashtype > (hash,300000); + + //result &= AvalancheTest< Blob, hashtype > (hash,200000); + //result &= AvalancheTest< Blob<768>, hashtype > (hash,2000000); // The bit independence test is slow and not particularly useful... //result &= BicTest < Blob, hashtype > ( hash, 1000000 ); @@ -241,34 +280,139 @@ void test ( hashfunc hash, const char * hashname ) if(g_testPermutation || g_testAll) { - printf("[[[ Keyset 'Permutation' Tests ]]]\n\n"); + { + // This one breaks lookup3, surprisingly - bool result = true; - bool drawDiagram = false; + printf("[[[ Keyset 'Combination Lowbits' Tests ]]]\n\n"); + + bool result = true; + bool drawDiagram = false; - // This very sparse set of blocks is particularly hard for SuperFastHash + uint32_t blocks[] = + { + 0x00000000, + + 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007, + }; + + result &= CombinationKeyTest(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } - uint32_t blocks[] = { - 0x00000000, - 0x00000001, - 0x00000002, - - 0x00000400, - 0x00008000, - - 0x00080000, - 0x00200000, - - 0x20000000, - 0x40000000, - 0x80000000, - }; - - result &= PermutationKeyTest(hash,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram); + printf("[[[ Keyset 'Combination Highbits' Tests ]]]\n\n"); - if(!result) printf("*********FAIL*********\n"); - printf("\n"); + bool result = true; + bool drawDiagram = false; + + uint32_t blocks[] = + { + 0x00000000, + + 0x20000000, 0x40000000, 0x60000000, 0x80000000, 0xA0000000, 0xC0000000, 0xE0000000 + }; + + result &= CombinationKeyTest(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + + { + printf("[[[ Keyset 'Combination 0x8000000' Tests ]]]\n\n"); + + bool result = true; + bool drawDiagram = false; + + uint32_t blocks[] = + { + 0x00000000, + + 0x80000000, + }; + + result &= CombinationKeyTest(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + + { + printf("[[[ Keyset 'Combination 0x0000001' Tests ]]]\n\n"); + + bool result = true; + bool drawDiagram = false; + + uint32_t blocks[] = + { + 0x00000000, + + 0x00000001, + }; + + result &= CombinationKeyTest(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + + { + printf("[[[ Keyset 'Combination Hi-Lo' Tests ]]]\n\n"); + + bool result = true; + bool drawDiagram = false; + + uint32_t blocks[] = + { + 0x00000000, + + 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007, + + 0x80000000, 0x40000000, 0xC0000000, 0x20000000, 0xA0000000, 0x60000000, 0xE0000000 + }; + + result &= CombinationKeyTest(hash,6,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + + //---------- + + /* + { + printf("[[[ Keyset 'Permutation' Tests ]]]\n\n"); + + bool result = true; + bool drawDiagram = false; + + // This very sparse set of blocks is particularly hard for SuperFastHash + + uint32_t blocks[] = + { + 0x00000000, + 0x00000001, + 0x00000002, + + 0x00000400, + 0x00008000, + + 0x00080000, + 0x00200000, + + 0x20000000, + 0x40000000, + 0x80000000, + }; + + result &= PermutationKeyTest(hash,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + */ } //----------------------------------------------------------------------------- @@ -370,6 +514,10 @@ void testHash ( const char * name ) { test( pInfo->hash, pInfo->desc ); } + else if(pInfo->hashbits == 256) + { + test( pInfo->hash, pInfo->desc ); + } else { printf("Invalid hash bit width %d for hash '%s'",pInfo->hashbits,pInfo->name); @@ -393,9 +541,11 @@ int main ( int argc, char ** argv ) //g_testSanity = true; //g_testSpeed = true; //g_testAvalanche = true; + //g_testCyclic = true; //g_testDiff = true; //g_testSparse = true; //g_testPermutation = true; + //g_testZeroes = true; //testHash("rand32"); //testHash("rand64"); @@ -408,22 +558,24 @@ int main ( int argc, char ** argv ) //printf("Called the hash function %I64d times, %I64d bytes hashed\n",g_hashcount,g_bytecount); //testHash("crc32"); + //testHash("rand128"); //testHash("fnv"); //testHash("superfast"); //testHash("lookup3"); + //testHash("MurmurOAAT"); //testHash("murmur2"); //testHash("murmur2B"); //testHash("murmur2C"); - testHash("murmur3a"); - testHash("murmur3b"); - testHash("murmur3c"); + //testHash("murmur3a"); + //testHash("murmur3b"); + //testHash("murmur3c"); - testHash("murmur3d"); - testHash("murmur3e"); - testHash("murmur3f"); + //testHash("murmur3d"); + //testHash("murmur3e"); + //testHash("murmur3f"); //---------- @@ -432,4 +584,4 @@ int main ( int argc, char ** argv ) printf("time %d\n",b-a); return 0; -} +} \ No newline at end of file -- cgit v1.2.3