summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortanjent@gmail.com <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>2011-03-31 02:41:29 +0000
committertanjent@gmail.com <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a>2011-03-31 02:41:29 +0000
commit96601f2cd5d12b4618ecd830e8a155cad18e15dc (patch)
tree1da9d04e7f7f4b15cafb36e61e91525bb9fa96b4
parent623590de821184aafc3aa6d72b9fd24791884751 (diff)
downloadsrc-96601f2cd5d12b4618ecd830e8a155cad18e15dc.tar.gz
Add TwoBytes test (not on by default)
Remove hash list duplication from collision test so we don't use so much RAM git-svn-id: http://smhasher.googlecode.com/svn/trunk@101 77a7d1d3-4c08-bdc2-d393-d5859734b01a
-rw-r--r--KeysetTest.h80
-rw-r--r--SMHasher.vcproj4
-rw-r--r--Stats.cpp2
-rw-r--r--Stats.h7
-rw-r--r--Types.h6
-rw-r--r--main.cpp29
6 files changed, 123 insertions, 5 deletions
diff --git a/KeysetTest.h b/KeysetTest.h
index 936e535..9e41b6f 100644
--- a/KeysetTest.h
+++ b/KeysetTest.h
@@ -12,6 +12,7 @@
#include "Random.h" // for rand_p
#include <algorithm> // for std::swap
+#include <assert.h>
//-----------------------------------------------------------------------------
// Sanity tests
@@ -288,6 +289,85 @@ bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycoun
}
//-----------------------------------------------------------------------------
+// Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes
+
+template < typename hashtype >
+bool TwoBytesTest ( pfHash hash, int maxlen, bool drawDiagram )
+{
+ int keycount = 0;
+
+ for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2);
+
+ keycount *= 255*255;
+
+ for(int i = 2; i <= maxlen; i++) keycount += i*255;
+
+ printf("Keyset 'TwoBytes' - %d keys of up to %d bytes\n",keycount,maxlen);
+
+ std::vector<hashtype> hashes;
+ hashes.resize(keycount);
+ int cursor = 0;
+
+ uint8_t key[256];
+
+ memset(key,0,256);
+
+ //----------
+ // Add all keys with one non-zero byte
+
+ for(int keylen = 2; keylen <= maxlen; keylen++)
+ for(int byteA = 0; byteA < keylen; byteA++)
+ {
+ for(int valA = 1; valA <= 255; valA++)
+ {
+ key[byteA] = (uint8_t)valA;
+
+ assert(cursor <= keycount);
+ hash(key,keylen,0,&hashes[cursor++]);
+ }
+
+ key[byteA] = 0;
+ }
+
+ //----------
+ // Add all keys with two non-zero bytes
+
+ for(int keylen = 2; keylen <= maxlen; keylen++)
+ for(int byteA = 0; byteA < keylen-1; byteA++)
+ for(int byteB = byteA+1; byteB < keylen; byteB++)
+ {
+ for(int valA = 1; valA <= 255; valA++)
+ {
+ key[byteA] = (uint8_t)valA;
+
+ for(int valB = 1; valB <= 255; valB++)
+ {
+ key[byteB] = (uint8_t)valB;
+ assert(cursor <= keycount);
+ hash(key,keylen,0,&hashes[cursor++]);
+ }
+
+ key[byteB] = 0;
+ }
+
+ key[byteA] = 0;
+ }
+
+ //----------
+
+ printf("Actually %d keys\n",cursor);
+
+ assert(cursor == keycount);
+
+ bool result = true;
+
+ result &= TestHashList(hashes,true,false,drawDiagram);
+ printf("\n");
+
+ return result;
+}
+
+//-----------------------------------------------------------------------------
// Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix",
// where "core" consists of all possible combinations of the given character
// set of length N.
diff --git a/SMHasher.vcproj b/SMHasher.vcproj
index 05586f7..bb4125e 100644
--- a/SMHasher.vcproj
+++ b/SMHasher.vcproj
@@ -323,6 +323,10 @@
Name="Hashes"
>
<File
+ RelativePath=".\CityHash.cpp"
+ >
+ </File>
+ <File
RelativePath=".\crc.cpp"
>
</File>
diff --git a/Stats.cpp b/Stats.cpp
index 4b722c8..55e99fc 100644
--- a/Stats.cpp
+++ b/Stats.cpp
@@ -4,7 +4,7 @@
double chooseK ( int n, int k )
{
- if(k > (n - k)) k = n - k;
+ if(k > (n - k)) k = n - k;
double c = 1;
diff --git a/Stats.h b/Stats.h
index 3246373..5f60c61 100644
--- a/Stats.h
+++ b/Stats.h
@@ -37,16 +37,15 @@ inline uint32_t f3mix ( uint32_t k )
//-----------------------------------------------------------------------------
template< typename hashtype >
-int CountCollisions ( std::vector<hashtype> const & hashes )
+int CountCollisions ( std::vector<hashtype> & hashes )
{
int collcount = 0;
- std::vector<hashtype> temp = hashes;
- std::sort(temp.begin(),temp.end());
+ std::sort(hashes.begin(),hashes.end());
for(size_t i = 1; i < hashes.size(); i++)
{
- if(temp[i] == temp[i-1]) collcount++;
+ if(hashes[i] == hashes[i-1]) collcount++;
}
return collcount;
diff --git a/Types.h b/Types.h
index 1abb352..ddb464b 100644
--- a/Types.h
+++ b/Types.h
@@ -102,6 +102,12 @@ public:
return *this;
}
+ Blob ( uint64_t a, uint64_t b )
+ {
+ uint64_t t[2] = {a,b};
+ set(&t,16);
+ }
+
void set ( const void * blob, int len )
{
const uint8_t * k = (const uint8_t*)blob;
diff --git a/main.cpp b/main.cpp
index ab397e7..bc4996c 100644
--- a/main.cpp
+++ b/main.cpp
@@ -20,6 +20,7 @@ bool g_testDiffDist = false;
bool g_testAvalanche = false;
bool g_testBIC = false;
bool g_testCyclic = false;
+bool g_testTwoBytes = false;
bool g_testSparse = false;
bool g_testPermutation = false;
bool g_testWindow = false;
@@ -255,6 +256,22 @@ void test ( hashfunc<hashtype> hash, HashInfo * info )
}
//-----------------------------------------------------------------------------
+ // Keyset 'TwoBytes'
+
+ if(g_testTwoBytes)
+ {
+ printf("[[[ Keyset 'TwoBytes' Tests ]]]\n\n");
+
+ bool result = true;
+ bool drawDiagram = false;
+
+ result &= TwoBytesTest<hashtype>(hash,24,drawDiagram);
+
+ if(!result) printf("*********FAIL*********\n");
+ printf("\n");
+ }
+
+ //-----------------------------------------------------------------------------
// Keyset 'Sparse'
if(g_testSparse || g_testAll)
@@ -539,6 +556,7 @@ int main ( int argc, char ** argv )
//g_testAvalanche = true;
//g_testBIC = true;
//g_testCyclic = true;
+ //g_testTwoBytes = true;
//g_testDiff = true;
//g_testDiffDist = true;
//g_testSparse = true;
@@ -547,6 +565,17 @@ int main ( int argc, char ** argv )
testHash(hashToTest);
+ /*
+ for(int i = 0; i < sizeof(g_hashes)/sizeof(HashInfo); i++)
+ {
+ testHash(g_hashes[i].name);
+ }
+ */
+
+ //testHash("murmur3a");
+ //testHash("murmur3c");
+ //testHash("murmur3f");
+
//----------
int timeEnd = clock();