summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortanjent@gmail.com <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>2011-04-04 23:05:26 +0000
committertanjent@gmail.com <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>2011-04-04 23:05:26 +0000
commit0f37bbdda0d33613c84af0d1b786751c1cc99316 (patch)
tree2f61b455f4d9c93381d8e3f95fa239909458aa75
parent6bde2782cde84aea2002eed32ffd12c49d72544d (diff)
downloadsrc-0f37bbdda0d33613c84af0d1b786751c1cc99316.tar.gz
some test code for collision reporting
cleanup murmur3, fix len-collision issue make main thread high priority on windows fix missing typecast in SpeedTest.cpp, increase bulk speed test reps remove reference to old file git-svn-id: http://smhasher.googlecode.com/svn/trunk@120 77a7d1d3-4c08-bdc2-d393-d5859734b01a
-rw-r--r--KeysetTest.cpp63
-rw-r--r--MurmurHash3.cpp83
-rw-r--r--Platform.cpp1
-rw-r--r--SMHasher.vcproj4
-rw-r--r--SpeedTest.cpp6
-rw-r--r--main.cpp4
6 files changed, 99 insertions, 62 deletions
diff --git a/KeysetTest.cpp b/KeysetTest.cpp
index f11d512..1cae57d 100644
--- a/KeysetTest.cpp
+++ b/KeysetTest.cpp
@@ -262,3 +262,66 @@ void TwoBytesKeygen ( int maxlen, KeyCallback & c )
}
//-----------------------------------------------------------------------------
+
+template< typename hashtype >
+void DumpCollisionMap ( CollisionMap<hashtype,ByteVec> & cmap )
+{
+ typedef CollisionMap<hashtype,ByteVec> cmap_t;
+
+ for(cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it)
+ {
+ const hashtype & hash = (*it).first;
+
+ printf("Hash - ");
+ printbytes(&hash,sizeof(hashtype));
+ printf("\n");
+
+ std::vector<ByteVec> & keys = (*it).second;
+
+ for(int i = 0; i < (int)keys.size(); i++)
+ {
+ ByteVec & key = keys[i];
+
+ printf("Key - ");
+ printbytes(&key[0],(int)key.size());
+ printf("\n");
+ }
+ printf("\n");
+ }
+
+}
+
+// test code
+
+void ReportCollisions ( pfHash hash )
+{
+ printf("Hashing keyset\n");
+
+ std::vector<uint128_t> hashes;
+
+ HashCallback<uint128_t> c(hash,hashes);
+
+ TwoBytesKeygen(20,c);
+
+ printf("%d hashes\n",hashes.size());
+
+ printf("Finding collisions\n");
+
+ HashSet<uint128_t> collisions;
+
+ FindCollisions(hashes,collisions,1000);
+
+ printf("%d collisions\n",collisions.size());
+
+ printf("Mapping collisions\n");
+
+ CollisionMap<uint128_t,ByteVec> cmap;
+
+ CollisionCallback<uint128_t> c2(hash,collisions,cmap);
+
+ TwoBytesKeygen(20,c2);
+
+ printf("Dumping collisions\n");
+
+ DumpCollisionMap(cmap);
+} \ No newline at end of file
diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp
index 52c4043..8f63772 100644
--- a/MurmurHash3.cpp
+++ b/MurmurHash3.cpp
@@ -25,53 +25,6 @@ FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
}
//-----------------------------------------------------------------------------
-// Block mix - mix the key block, combine with hash block, mix the hash block,
-// repeat.
-
-FORCE_INLINE void bmix ( uint32_t & h1, uint32_t & k1,
- uint32_t & c1, uint32_t & c2 )
-{
- k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
-
- h1 = ROTL32(h1,13); h1 = h1*5+0xe6546b64;
-}
-
-//----------
-
-FORCE_INLINE void bmix ( uint64_t & h1, uint64_t & h2,
- uint64_t & k1, uint64_t & k2,
- uint64_t & c1, uint64_t & c2 )
-{
- k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
-
- h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
-
- k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
-
- h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
-}
-
-//----------
-
-FORCE_INLINE void bmix ( uint32_t & h1, uint32_t & h2,
- uint32_t & h3, uint32_t & h4,
- uint32_t & k1, uint32_t & k2,
- uint32_t & k3, uint32_t & k4,
- uint32_t & c1, uint32_t & c2,
- uint32_t & c3, uint32_t & c4 )
-{
- k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
- k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
- k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
- k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
-
- h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
- h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
- h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
- h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
-}
-
-//-----------------------------------------------------------------------------
// Finalization mix - force all bits of a hash block to avalanche
FORCE_INLINE uint32_t fmix ( uint32_t h )
@@ -120,7 +73,13 @@ void MurmurHash3_x86_32 ( const void * key, int len,
{
uint32_t k1 = getblock(blocks,i);
- bmix(h1,k1,c1,c2);
+ k1 *= c1;
+ k1 = ROTL32(k1,15);
+ k1 *= c2;
+
+ h1 ^= k1;
+ h1 = ROTL32(h1,13);
+ h1 = h1*5+0xe6546b64;
}
//----------
@@ -178,7 +137,21 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
uint32_t k3 = getblock(blocks,i*4+2);
uint32_t k4 = getblock(blocks,i*4+3);
- bmix(h1,h2,h3,h4, k1,k2,k3,k4, c1, c2, c3, c4);
+ k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+
+ h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
+
+ k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+ h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
+
+ k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+ h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
+
+ k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+ h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
}
//----------
@@ -220,7 +193,7 @@ void MurmurHash3_x86_128 ( const void * key, const int len,
//----------
// finalization
- h4 ^= len;
+ h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
h1 += h2; h1 += h3; h1 += h4;
h2 += h1; h3 += h1; h4 += h1;
@@ -263,7 +236,13 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
uint64_t k1 = getblock(blocks,i*2+0);
uint64_t k2 = getblock(blocks,i*2+1);
- bmix(h1,h2,k1,k2,c1,c2);
+ k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+
+ h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+ k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+ h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
}
//----------
@@ -299,7 +278,7 @@ void MurmurHash3_x64_128 ( const void * key, const int len,
//----------
// finalization
- h2 ^= len;
+ h1 ^= len; h2 ^= len;
h1 += h2;
h2 += h1;
diff --git a/Platform.cpp b/Platform.cpp
index 3561379..dff36cb 100644
--- a/Platform.cpp
+++ b/Platform.cpp
@@ -16,6 +16,7 @@ void testRDTSC ( void )
void SetAffinity ( int cpu )
{
SetProcessAffinityMask(GetCurrentProcess(),cpu);
+ SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
}
#else
diff --git a/SMHasher.vcproj b/SMHasher.vcproj
index bb4125e..05586f7 100644
--- a/SMHasher.vcproj
+++ b/SMHasher.vcproj
@@ -323,10 +323,6 @@
Name="Hashes"
>
<File
- RelativePath=".\CityHash.cpp"
- >
- </File>
- <File
RelativePath=".\crc.cpp"
>
</File>
diff --git a/SpeedTest.cpp b/SpeedTest.cpp
index 9012ae3..b586102 100644
--- a/SpeedTest.cpp
+++ b/SpeedTest.cpp
@@ -189,7 +189,7 @@ double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int block
{
r.rand_p(block,blocksize);
- double t = timehash(hash,block,blocksize,itrial);
+ double t = (double)timehash(hash,block,blocksize,itrial);
if(t > 0) times.push_back(t);
}
@@ -210,9 +210,7 @@ double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int block
void BulkSpeedTest ( pfHash hash, uint32_t seed )
{
- Rand r(seed);
-
- const int trials = 999;
+ const int trials = 2999;
const int blocksize = 256 * 1024;
printf("Bulk speed test - %d-byte keys\n",blocksize);
diff --git a/main.cpp b/main.cpp
index 4574015..8c76468 100644
--- a/main.cpp
+++ b/main.cpp
@@ -67,8 +67,8 @@ HashInfo g_hashes[] =
// MurmurHash3
{ MurmurHash3_x86_32, 32, 0x3252D141, "Murmur3A", "MurmurHash3 for x86, 32-bit" },
- { MurmurHash3_x86_128, 128, 0x13C7ED69, "Murmur3C", "MurmurHash3 for x86, 128-bit" },
- { MurmurHash3_x64_128, 128, 0x33949085, "Murmur3F", "MurmurHash3 for x64, 128-bit" },
+ { MurmurHash3_x86_128, 128, 0xB3ECE62A, "Murmur3C", "MurmurHash3 for x86, 128-bit" },
+ { MurmurHash3_x64_128, 128, 0x833607F9, "Murmur3F", "MurmurHash3 for x64, 128-bit" },
};