diff options
author | tanjent@gmail.com <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a> | 2012-03-01 03:38:55 +0000 |
---|---|---|
committer | tanjent@gmail.com <tanjent@gmail.com@77a7d1d3-4c08-bdc2-d393-d5859734b01a> | 2012-03-01 03:38:55 +0000 |
commit | f3b789787b93945c974e2cc517b7dc352b28354e (patch) | |
tree | 964ce5b7a74e21ce9056d974270ae7ba8d3389cd | |
parent | b35e562e2d80bc47a51b53ec92a305eb9a3383b4 (diff) | |
download | src-f3b789787b93945c974e2cc517b7dc352b28354e.tar.gz |
Merge branch chandlerc_dev
git-svn-id: http://smhasher.googlecode.com/svn/trunk@144 77a7d1d3-4c08-bdc2-d393-d5859734b01a
-rw-r--r-- | AvalancheTest.cpp | 112 | ||||
-rw-r--r-- | AvalancheTest.h | 844 | ||||
-rw-r--r-- | Bitslice.cpp | 252 | ||||
-rw-r--r-- | Bitvec.cpp | 1514 | ||||
-rw-r--r-- | Bitvec.h | 490 | ||||
-rw-r--r-- | CMakeLists.txt | 3 | ||||
-rw-r--r-- | City.cpp | 786 | ||||
-rw-r--r-- | City.h | 203 | ||||
-rw-r--r-- | CityTest.cpp | 15 | ||||
-rw-r--r-- | DifferentialTest.cpp | 6 | ||||
-rw-r--r-- | DifferentialTest.h | 562 | ||||
-rw-r--r-- | Hashes.cpp | 310 | ||||
-rw-r--r-- | Hashes.h | 151 | ||||
-rw-r--r-- | KeysetTest.cpp | 654 | ||||
-rw-r--r-- | KeysetTest.h | 878 | ||||
-rw-r--r-- | MurmurHash1.cpp | 348 | ||||
-rw-r--r-- | MurmurHash1.h | 68 | ||||
-rw-r--r-- | MurmurHash2.cpp | 1046 | ||||
-rw-r--r-- | MurmurHash2.h | 78 | ||||
-rw-r--r-- | MurmurHash3.cpp | 670 | ||||
-rw-r--r-- | MurmurHash3.h | 74 | ||||
-rw-r--r-- | Platform.cpp | 84 | ||||
-rw-r--r-- | Platform.h | 179 | ||||
-rw-r--r-- | Random.cpp | 16 | ||||
-rw-r--r-- | Random.h | 234 | ||||
-rw-r--r-- | SpeedTest.cpp | 484 | ||||
-rw-r--r-- | SpeedTest.h | 16 | ||||
-rw-r--r-- | Spooky.cpp | 347 | ||||
-rw-r--r-- | Spooky.h | 293 | ||||
-rw-r--r-- | SpookyTest.cpp | 16 | ||||
-rw-r--r-- | Stats.cpp | 198 | ||||
-rw-r--r-- | Stats.h | 776 | ||||
-rw-r--r-- | SuperFastHash.cpp | 152 | ||||
-rw-r--r-- | Types.cpp | 296 | ||||
-rw-r--r-- | Types.h | 748 | ||||
-rw-r--r-- | crc.cpp | 200 | ||||
-rw-r--r-- | lookup3.cpp | 144 | ||||
-rw-r--r-- | main.cpp | 1185 | ||||
-rw-r--r-- | md5.cpp | 762 | ||||
-rw-r--r-- | pstdint.h | 1598 | ||||
-rw-r--r-- | sha1.cpp | 650 | ||||
-rw-r--r-- | sha1.h | 40 |
42 files changed, 9164 insertions, 8318 deletions
diff --git a/AvalancheTest.cpp b/AvalancheTest.cpp index 38aa452..f5ea0df 100644 --- a/AvalancheTest.cpp +++ b/AvalancheTest.cpp @@ -1,56 +1,56 @@ -#include "AvalancheTest.h"
-
-//-----------------------------------------------------------------------------
-
-void PrintAvalancheDiagram ( int x, int y, int reps, double scale, int * bins )
-{
- const char * symbols = ".123456789X";
-
- for(int i = 0; i < y; i++)
- {
- printf("[");
- for(int j = 0; j < x; j++)
- {
- int k = (y - i) -1;
-
- int bin = bins[k + (j*y)];
-
- double b = double(bin) / double(reps);
- b = fabs(b*2 - 1);
-
- b *= scale;
-
- int s = (int)floor(b*10);
-
- if(s > 10) s = 10;
- if(s < 0) s = 0;
-
- printf("%c",symbols[s]);
- }
-
- printf("]\n");
- }
-}
-
-//----------------------------------------------------------------------------
-
-double maxBias ( std::vector<int> & counts, int reps )
-{
- double worst = 0;
-
- for(int i = 0; i < (int)counts.size(); i++)
- {
- double c = double(counts[i]) / double(reps);
-
- double d = fabs(c * 2 - 1);
-
- if(d > worst)
- {
- worst = d;
- }
- }
-
- return worst;
-}
-
-//-----------------------------------------------------------------------------
+#include "AvalancheTest.h" + +//----------------------------------------------------------------------------- + +void PrintAvalancheDiagram ( int x, int y, int reps, double scale, int * bins ) +{ + const char * symbols = ".123456789X"; + + for(int i = 0; i < y; i++) + { + printf("["); + for(int j = 0; j < x; j++) + { + int k = (y - i) -1; + + int bin = bins[k + (j*y)]; + + double b = double(bin) / double(reps); + b = fabs(b*2 - 1); + + b *= scale; + + int s = (int)floor(b*10); + + if(s > 10) s = 10; + if(s < 0) s = 0; + + printf("%c",symbols[s]); + } + + printf("]\n"); + } +} + +//---------------------------------------------------------------------------- + +double maxBias ( std::vector<int> & counts, int reps ) +{ + double worst = 0; + + for(int i = 0; i < (int)counts.size(); i++) + { + double c = double(counts[i]) / double(reps); + + double d = fabs(c * 2 - 1); + + if(d > worst) + { + worst = d; + } + } + + return worst; +} + +//----------------------------------------------------------------------------- diff --git a/AvalancheTest.h b/AvalancheTest.h index 4c23369..f1bfeea 100644 --- a/AvalancheTest.h +++ b/AvalancheTest.h @@ -1,422 +1,422 @@ -//-----------------------------------------------------------------------------
-// Flipping a single bit of a key should cause an "avalanche" of changes in
-// the hash function's output. Ideally, each output bits should flip 50% of
-// the time - if the probability of an output bit flipping is not 50%, that bit
-// is "biased". Too much bias means that patterns applied to the input will
-// cause "echoes" of the patterns in the output, which in turn can cause the
-// hash function to fail to create an even, random distribution of hash values.
-
-
-#pragma once
-
-#include "Types.h"
-#include "Random.h"
-
-#include <vector>
-#include <stdio.h>
-#include <math.h>
-
-// Avalanche fails if a bit is biased by more than 1%
-
-#define AVALANCHE_FAIL 0.01
-
-double maxBias ( std::vector<int> & counts, int reps );
-
-//-----------------------------------------------------------------------------
-
-template < typename keytype, typename hashtype >
-void calcBias ( pfHash hash, std::vector<int> & counts, int reps, Rand & r )
-{
- const int keybytes = sizeof(keytype);
- const int hashbytes = sizeof(hashtype);
-
- const int keybits = keybytes * 8;
- const int hashbits = hashbytes * 8;
-
- keytype K;
- hashtype A,B;
-
- for(int irep = 0; irep < reps; irep++)
- {
- if(irep % (reps/10) == 0) printf(".");
-
- r.rand_p(&K,keybytes);
-
- hash(&K,keybytes,0,&A);
-
- int * cursor = &counts[0];
-
- for(int iBit = 0; iBit < keybits; iBit++)
- {
- flipbit(&K,keybytes,iBit);
- hash(&K,keybytes,0,&B);
- flipbit(&K,keybytes,iBit);
-
- for(int iOut = 0; iOut < hashbits; iOut++)
- {
- int bitA = getbit(&A,hashbytes,iOut);
- int bitB = getbit(&B,hashbytes,iOut);
-
- (*cursor++) += (bitA ^ bitB);
- }
- }
- }
-}
-
-//-----------------------------------------------------------------------------
-
-template < typename keytype, typename hashtype >
-bool AvalancheTest ( pfHash hash, const int reps )
-{
- Rand r(48273);
-
- const int keybytes = sizeof(keytype);
- const int hashbytes = sizeof(hashtype);
-
- const int keybits = keybytes * 8;
- const int hashbits = hashbytes * 8;
-
- printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps);
-
- //----------
-
- std::vector<int> bins(keybits*hashbits,0);
-
- calcBias<keytype,hashtype>(hash,bins,reps,r);
-
- //----------
-
- bool result = true;
-
- double b = maxBias(bins,reps);
-
- printf(" worst bias is %f%%",b * 100.0);
-
- if(b > AVALANCHE_FAIL)
- {
- printf(" !!!!! ");
- result = false;
- }
-
- printf("\n");
-
- return result;
-}
-
-//----------------------------------------------------------------------------
-// Tests the Bit Independence Criteron. Stricter than Avalanche, but slow and
-// not really all that useful.
-
-template< typename keytype, typename hashtype >
-void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose )
-{
- Rand r(11938);
-
- const int keybytes = sizeof(keytype);
- const int hashbytes = sizeof(hashtype);
- const int hashbits = hashbytes * 8;
-
- std::vector<int> bins(hashbits*hashbits*4,0);
-
- keytype key;
- hashtype h1,h2;
-
- for(int irep = 0; irep < reps; irep++)
- {
- if(verbose)
- {
- if(irep % (reps/10) == 0) printf(".");
- }
-
- r.rand_p(&key,keybytes);
- hash(&key,keybytes,0,&h1);
-
- flipbit(key,keybit);
- hash(&key,keybytes,0,&h2);
-
- hashtype d = h1 ^ h2;
-
- for(int out1 = 0; out1 < hashbits; out1++)
- for(int out2 = 0; out2 < hashbits; out2++)
- {
- if(out1 == out2) continue;
-
- uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
-
- bins[(out1 * hashbits + out2) * 4 + b]++;
- }
- }
-
- if(verbose) printf("\n");
-
- maxBias = 0;
-
- for(int out1 = 0; out1 < hashbits; out1++)
- {
- for(int out2 = 0; out2 < hashbits; out2++)
- {
- if(out1 == out2)
- {
- if(verbose) printf("\\");
- continue;
- }
-
- double bias = 0;
-
- for(int b = 0; b < 4; b++)
- {
- double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2);
- b2 = fabs(b2 * 2 - 1);
-
- if(b2 > bias) bias = b2;
- }
-
- if(bias > maxBias)
- {
- maxBias = bias;
- maxA = out1;
- maxB = out2;
- }
-
- if(verbose)
- {
- if (bias < 0.01) printf(".");
- else if(bias < 0.05) printf("o");
- else if(bias < 0.33) printf("O");
- else printf("X");
- }
- }
-
- if(verbose) printf("\n");
- }
-}
-
-//----------
-
-template< typename keytype, typename hashtype >
-bool BicTest ( pfHash hash, const int reps )
-{
- const int keybytes = sizeof(keytype);
- const int keybits = keybytes * 8;
-
- double maxBias = 0;
- int maxK = 0;
- int maxA = 0;
- int maxB = 0;
-
- for(int i = 0; i < keybits; i++)
- {
- if(i % (keybits/10) == 0) printf(".");
-
- double bias;
- int a,b;
-
- BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,true);
-
- if(bias > maxBias)
- {
- maxBias = bias;
- maxK = i;
- maxA = a;
- maxB = b;
- }
- }
-
- printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
-
- // Bit independence is harder to pass than avalanche, so we're a bit more lax here.
-
- bool result = (maxBias < 0.05);
-
- return result;
-}
-
-//-----------------------------------------------------------------------------
-// BIC test variant - store all intermediate data in a table, draw diagram
-// afterwards (much faster)
-
-template< typename keytype, typename hashtype >
-void BicTest3 ( pfHash hash, const int reps, bool verbose = true )
-{
- const int keybytes = sizeof(keytype);
- const int keybits = keybytes * 8;
- const int hashbytes = sizeof(hashtype);
- const int hashbits = hashbytes * 8;
- const int pagesize = hashbits*hashbits*4;
-
- Rand r(11938);
-
- double maxBias = 0;
- int maxK = 0;
- int maxA = 0;
- int maxB = 0;
-
- keytype key;
- hashtype h1,h2;
-
- std::vector<int> bins(keybits*pagesize,0);
-
- for(int keybit = 0; keybit < keybits; keybit++)
- {
- if(keybit % (keybits/10) == 0) printf(".");
-
- int * page = &bins[keybit*pagesize];
-
- for(int irep = 0; irep < reps; irep++)
- {
- r.rand_p(&key,keybytes);
- hash(&key,keybytes,0,&h1);
- flipbit(key,keybit);
- hash(&key,keybytes,0,&h2);
-
- hashtype d = h1 ^ h2;
-
- for(int out1 = 0; out1 < hashbits-1; out1++)
- for(int out2 = out1+1; out2 < hashbits; out2++)
- {
- int * b = &page[(out1*hashbits+out2)*4];
-
- uint32_t x = getbit(d,out1) | (getbit(d,out2) << 1);
-
- b[x]++;
- }
- }
- }
-
- printf("\n");
-
- for(int out1 = 0; out1 < hashbits-1; out1++)
- {
- for(int out2 = out1+1; out2 < hashbits; out2++)
- {
- if(verbose) printf("(%3d,%3d) - ",out1,out2);
-
- for(int keybit = 0; keybit < keybits; keybit++)
- {
- int * page = &bins[keybit*pagesize];
- int * bins = &page[(out1*hashbits+out2)*4];
-
- double bias = 0;
-
- for(int b = 0; b < 4; b++)
- {
- double b2 = double(bins[b]) / double(reps / 2);
- b2 = fabs(b2 * 2 - 1);
-
- if(b2 > bias) bias = b2;
- }
-
- if(bias > maxBias)
- {
- maxBias = bias;
- maxK = keybit;
- maxA = out1;
- maxB = out2;
- }
-
- if(verbose)
- {
- if (bias < 0.01) printf(".");
- else if(bias < 0.05) printf("o");
- else if(bias < 0.33) printf("O");
- else printf("X");
- }
- }
-
- // Finished keybit
-
- if(verbose) printf("\n");
- }
-
- if(verbose)
- {
- for(int i = 0; i < keybits+12; i++) printf("-");
- printf("\n");
- }
- }
-
- printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
-}
-
-
-//-----------------------------------------------------------------------------
-// BIC test variant - iterate over output bits, then key bits. No temp storage,
-// but slooooow
-
-template< typename keytype, typename hashtype >
-void BicTest2 ( pfHash hash, const int reps, bool verbose = true )
-{
- const int keybytes = sizeof(keytype);
- const int keybits = keybytes * 8;
- const int hashbytes = sizeof(hashtype);
- const int hashbits = hashbytes * 8;
-
- Rand r(11938);
-
- double maxBias = 0;
- int maxK = 0;
- int maxA = 0;
- int maxB = 0;
-
- keytype key;
- hashtype h1,h2;
-
- for(int out1 = 0; out1 < hashbits-1; out1++)
- for(int out2 = out1+1; out2 < hashbits; out2++)
- {
- if(verbose) printf("(%3d,%3d) - ",out1,out2);
-
- for(int keybit = 0; keybit < keybits; keybit++)
- {
- int bins[4] = { 0, 0, 0, 0 };
-
- for(int irep = 0; irep < reps; irep++)
- {
- r.rand_p(&key,keybytes);
- hash(&key,keybytes,0,&h1);
- flipbit(key,keybit);
- hash(&key,keybytes,0,&h2);
-
- hashtype d = h1 ^ h2;
-
- uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
-
- bins[b]++;
- }
-
- double bias = 0;
-
- for(int b = 0; b < 4; b++)
- {
- double b2 = double(bins[b]) / double(reps / 2);
- b2 = fabs(b2 * 2 - 1);
-
- if(b2 > bias) bias = b2;
- }
-
- if(bias > maxBias)
- {
- maxBias = bias;
- maxK = keybit;
- maxA = out1;
- maxB = out2;
- }
-
- if(verbose)
- {
- if (bias < 0.05) printf(".");
- else if(bias < 0.10) printf("o");
- else if(bias < 0.50) printf("O");
- else printf("X");
- }
- }
-
- // Finished keybit
-
- if(verbose) printf("\n");
- }
-
- printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
-}
-
-//-----------------------------------------------------------------------------
+//----------------------------------------------------------------------------- +// Flipping a single bit of a key should cause an "avalanche" of changes in +// the hash function's output. Ideally, each output bits should flip 50% of +// the time - if the probability of an output bit flipping is not 50%, that bit +// is "biased". Too much bias means that patterns applied to the input will +// cause "echoes" of the patterns in the output, which in turn can cause the +// hash function to fail to create an even, random distribution of hash values. + + +#pragma once + +#include "Types.h" +#include "Random.h" + +#include <vector> +#include <stdio.h> +#include <math.h> + +// Avalanche fails if a bit is biased by more than 1% + +#define AVALANCHE_FAIL 0.01 + +double maxBias ( std::vector<int> & counts, int reps ); + +//----------------------------------------------------------------------------- + +template < typename keytype, typename hashtype > +void calcBias ( pfHash hash, std::vector<int> & counts, int reps, Rand & r ) +{ + const int keybytes = sizeof(keytype); + const int hashbytes = sizeof(hashtype); + + const int keybits = keybytes * 8; + const int hashbits = hashbytes * 8; + + keytype K; + hashtype A,B; + + for(int irep = 0; irep < reps; irep++) + { + if(irep % (reps/10) == 0) printf("."); + + r.rand_p(&K,keybytes); + + hash(&K,keybytes,0,&A); + + int * cursor = &counts[0]; + + for(int iBit = 0; iBit < keybits; iBit++) + { + flipbit(&K,keybytes,iBit); + hash(&K,keybytes,0,&B); + flipbit(&K,keybytes,iBit); + + for(int iOut = 0; iOut < hashbits; iOut++) + { + int bitA = getbit(&A,hashbytes,iOut); + int bitB = getbit(&B,hashbytes,iOut); + + (*cursor++) += (bitA ^ bitB); + } + } + } +} + +//----------------------------------------------------------------------------- + +template < typename keytype, typename hashtype > +bool AvalancheTest ( pfHash hash, const int reps ) +{ + Rand r(48273); + + const int keybytes = sizeof(keytype); + const int hashbytes = sizeof(hashtype); + + const int keybits = keybytes * 8; + const int hashbits = hashbytes * 8; + + printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps); + + //---------- + + std::vector<int> bins(keybits*hashbits,0); + + calcBias<keytype,hashtype>(hash,bins,reps,r); + + //---------- + + bool result = true; + + double b = maxBias(bins,reps); + + printf(" worst bias is %f%%",b * 100.0); + + if(b > AVALANCHE_FAIL) + { + printf(" !!!!! "); + result = false; + } + + printf("\n"); + + return result; +} + +//---------------------------------------------------------------------------- +// Tests the Bit Independence Criteron. Stricter than Avalanche, but slow and +// not really all that useful. + +template< typename keytype, typename hashtype > +void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose ) +{ + Rand r(11938); + + const int keybytes = sizeof(keytype); + const int hashbytes = sizeof(hashtype); + const int hashbits = hashbytes * 8; + + std::vector<int> bins(hashbits*hashbits*4,0); + + keytype key; + hashtype h1,h2; + + for(int irep = 0; irep < reps; irep++) + { + if(verbose) + { + if(irep % (reps/10) == 0) printf("."); + } + + r.rand_p(&key,keybytes); + hash(&key,keybytes,0,&h1); + + flipbit(key,keybit); + hash(&key,keybytes,0,&h2); + + hashtype d = h1 ^ h2; + + for(int out1 = 0; out1 < hashbits; out1++) + for(int out2 = 0; out2 < hashbits; out2++) + { + if(out1 == out2) continue; + + uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1); + + bins[(out1 * hashbits + out2) * 4 + b]++; + } + } + + if(verbose) printf("\n"); + + maxBias = 0; + + for(int out1 = 0; out1 < hashbits; out1++) + { + for(int out2 = 0; out2 < hashbits; out2++) + { + if(out1 == out2) + { + if(verbose) printf("\\"); + continue; + } + + double bias = 0; + + for(int b = 0; b < 4; b++) + { + double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2); + b2 = fabs(b2 * 2 - 1); + + if(b2 > bias) bias = b2; + } + + if(bias > maxBias) + { + maxBias = bias; + maxA = out1; + maxB = out2; + } + + if(verbose) + { + if (bias < 0.01) printf("."); + else if(bias < 0.05) printf("o"); + else if(bias < 0.33) printf("O"); + else printf("X"); + } + } + + if(verbose) printf("\n"); + } +} + +//---------- + +template< typename keytype, typename hashtype > +bool BicTest ( pfHash hash, const int reps ) +{ + const int keybytes = sizeof(keytype); + const int keybits = keybytes * 8; + + double maxBias = 0; + int maxK = 0; + int maxA = 0; + int maxB = 0; + + for(int i = 0; i < keybits; i++) + { + if(i % (keybits/10) == 0) printf("."); + + double bias; + int a,b; + + BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,true); + + if(bias > maxBias) + { + maxBias = bias; + maxK = i; + maxA = a; + maxB = b; + } + } + + printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB); + + // Bit independence is harder to pass than avalanche, so we're a bit more lax here. + + bool result = (maxBias < 0.05); + + return result; +} + +//----------------------------------------------------------------------------- +// BIC test variant - store all intermediate data in a table, draw diagram +// afterwards (much faster) + +template< typename keytype, typename hashtype > +void BicTest3 ( pfHash hash, const int reps, bool verbose = true ) +{ + const int keybytes = sizeof(keytype); + const int keybits = keybytes * 8; + const int hashbytes = sizeof(hashtype); + const int hashbits = hashbytes * 8; + const int pagesize = hashbits*hashbits*4; + + Rand r(11938); + + double maxBias = 0; + int maxK = 0; + int maxA = 0; + int maxB = 0; + + keytype key; + hashtype h1,h2; + + std::vector<int> bins(keybits*pagesize,0); + + for(int keybit = 0; keybit < keybits; keybit++) + { + if(keybit % (keybits/10) == 0) printf("."); + + int * page = &bins[keybit*pagesize]; + + for(int irep = 0; irep < reps; irep++) + { + r.rand_p(&key,keybytes); + hash(&key,keybytes,0,&h1); + flipbit(key,keybit); + hash(&key,keybytes,0,&h2); + + hashtype d = h1 ^ h2; + + for(int out1 = 0; out1 < hashbits-1; out1++) + for(int out2 = out1+1; out2 < hashbits; out2++) + { + int * b = &page[(out1*hashbits+out2)*4]; + + uint32_t x = getbit(d,out1) | (getbit(d,out2) << 1); + + b[x]++; + } + } + } + + printf("\n"); + + for(int out1 = 0; out1 < hashbits-1; out1++) + { + for(int out2 = out1+1; out2 < hashbits; out2++) + { + if(verbose) printf("(%3d,%3d) - ",out1,out2); + + for(int keybit = 0; keybit < keybits; keybit++) + { + int * page = &bins[keybit*pagesize]; + int * bins = &page[(out1*hashbits+out2)*4]; + + double bias = 0; + + for(int b = 0; b < 4; b++) + { + double b2 = double(bins[b]) / double(reps / 2); + b2 = fabs(b2 * 2 - 1); + + if(b2 > bias) bias = b2; + } + + if(bias > maxBias) + { + maxBias = bias; + maxK = keybit; + maxA = out1; + maxB = out2; + } + + if(verbose) + { + if (bias < 0.01) printf("."); + else if(bias < 0.05) printf("o"); + else if(bias < 0.33) printf("O"); + else printf("X"); + } + } + + // Finished keybit + + if(verbose) printf("\n"); + } + + if(verbose) + { + for(int i = 0; i < keybits+12; i++) printf("-"); + printf("\n"); + } + } + + printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB); +} + + +//----------------------------------------------------------------------------- +// BIC test variant - iterate over output bits, then key bits. No temp storage, +// but slooooow + +template< typename keytype, typename hashtype > +void BicTest2 ( pfHash hash, const int reps, bool verbose = true ) +{ + const int keybytes = sizeof(keytype); + const int keybits = keybytes * 8; + const int hashbytes = sizeof(hashtype); + const int hashbits = hashbytes * 8; + + Rand r(11938); + + double maxBias = 0; + int maxK = 0; + int maxA = 0; + int maxB = 0; + + keytype key; + hashtype h1,h2; + + for(int out1 = 0; out1 < hashbits-1; out1++) + for(int out2 = out1+1; out2 < hashbits; out2++) + { + if(verbose) printf("(%3d,%3d) - ",out1,out2); + + for(int keybit = 0; keybit < keybits; keybit++) + { + int bins[4] = { 0, 0, 0, 0 }; + + for(int irep = 0; irep < reps; irep++) + { + r.rand_p(&key,keybytes); + hash(&key,keybytes,0,&h1); + flipbit(key,keybit); + hash(&key,keybytes,0,&h2); + + hashtype d = h1 ^ h2; + + uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1); + + bins[b]++; + } + + double bias = 0; + + for(int b = 0; b < 4; b++) + { + double b2 = double(bins[b]) / double(reps / 2); + b2 = fabs(b2 * 2 - 1); + + if(b2 > bias) bias = b2; + } + + if(bias > maxBias) + { + maxBias = bias; + maxK = keybit; + maxA = out1; + maxB = out2; + } + + if(verbose) + { + if (bias < 0.05) printf("."); + else if(bias < 0.10) printf("o"); + else if(bias < 0.50) printf("O"); + else printf("X"); + } + } + + // Finished keybit + + if(verbose) printf("\n"); + } + + printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB); +} + +//----------------------------------------------------------------------------- diff --git a/Bitslice.cpp b/Bitslice.cpp index 428e355..45a2249 100644 --- a/Bitslice.cpp +++ b/Bitslice.cpp @@ -1,127 +1,127 @@ -#include "Bitvec.h"
-#include <vector>
-#include <assert.h>
-
-// handle xnor
-
-typedef std::vector<uint32_t> slice;
-typedef std::vector<slice> slice_vec;
-
-int countbits ( slice & v )
-{
- int c = 0;
-
- for(size_t i = 0; i < v.size(); i++)
- {
- int d = countbits(v[i]);
-
- c += d;
- }
-
- return c;
-}
-
-int countxor ( slice & a, slice & b )
-{
- assert(a.size() == b.size());
-
- int c = 0;
-
- for(size_t i = 0; i < a.size(); i++)
- {
- int d = countbits(a[i] ^ b[i]);
-
- c += d;
- }
-
- return c;
-}
-
-void xoreq ( slice & a, slice & b )
-{
- assert(a.size() == b.size());
-
- for(size_t i = 0; i < a.size(); i++)
- {
- a[i] ^= b[i];
- }
-}
-
-//-----------------------------------------------------------------------------
-// Bitslice a hash set
-
-template< typename hashtype >
-void Bitslice ( std::vector<hashtype> & hashes, slice_vec & slices )
-{
- const int hashbytes = sizeof(hashtype);
- const int hashbits = hashbytes * 8;
- const int slicelen = ((int)hashes.size() + 31) / 32;
-
- slices.clear();
- slices.resize(hashbits);
-
- for(int i = 0; i < (int)slices.size(); i++)
- {
- slices[i].resize(slicelen,0);
- }
-
- for(int j = 0; j < hashbits; j++)
- {
- void * sliceblob = &(slices[j][0]);
-
- for(int i = 0; i < (int)hashes.size(); i++)
- {
- int b = getbit(hashes[i],j);
-
- setbit(sliceblob,slicelen*4,i,b);
- }
- }
-}
-
-void FactorSlices ( slice_vec & slices )
-{
- std::vector<int> counts(slices.size(),0);
-
- for(size_t i = 0; i < slices.size(); i++)
- {
- counts[i] = countbits(slices[i]);
- }
-
- bool changed = true;
-
- while(changed)
- {
- int bestA = -1;
- int bestB = -1;
-
- for(int j = 0; j < (int)slices.size()-1; j++)
- {
- for(int i = j+1; i < (int)slices.size(); i++)
- {
- int d = countxor(slices[i],slices[j]);
-
- if((d < counts[i]) && (d < counts[j]))
- {
- if(counts[i] < counts[j])
- {
- bestA = j;
- bestB = i;
- }
- }
- else if(d < counts[i])
- {
- //bestA =
- }
- }
- }
- }
-}
-
-
-void foo ( void )
-{
- slice a;
- slice_vec b;
-
- Bitslice(a,b);
+#include "Bitvec.h" +#include <vector> +#include <assert.h> + +// handle xnor + +typedef std::vector<uint32_t> slice; +typedef std::vector<slice> slice_vec; + +int countbits ( slice & v ) +{ + int c = 0; + + for(size_t i = 0; i < v.size(); i++) + { + int d = countbits(v[i]); + + c += d; + } + + return c; +} + +int countxor ( slice & a, slice & b ) +{ + assert(a.size() == b.size()); + + int c = 0; + + for(size_t i = 0; i < a.size(); i++) + { + int d = countbits(a[i] ^ b[i]); + + c += d; + } + + return c; +} + +void xoreq ( slice & a, slice & b ) +{ + assert(a.size() == b.size()); + + for(size_t i = 0; i < a.size(); i++) + { + a[i] ^= b[i]; + } +} + +//----------------------------------------------------------------------------- +// Bitslice a hash set + +template< typename hashtype > +void Bitslice ( std::vector<hashtype> & hashes, slice_vec & slices ) +{ + const int hashbytes = sizeof(hashtype); + const int hashbits = hashbytes * 8; + const int slicelen = ((int)hashes.size() + 31) / 32; + + slices.clear(); + slices.resize(hashbits); + + for(int i = 0; i < (int)slices.size(); i++) + { + slices[i].resize(slicelen,0); + } + + for(int j = 0; j < hashbits; j++) + { + void * sliceblob = &(slices[j][0]); + + for(int i = 0; i < (int)hashes.size(); i++) + { + int b = getbit(hashes[i],j); + + setbit(sliceblob,slicelen*4,i,b); + } + } +} + +void FactorSlices ( slice_vec & slices ) +{ + std::vector<int> counts(slices.size(),0); + + for(size_t i = 0; i < slices.size(); i++) + { + counts[i] = countbits(slices[i]); + } + + bool changed = true; + + while(changed) + { + int bestA = -1; + int bestB = -1; + + for(int j = 0; j < (int)slices.size()-1; j++) + { + for(int i = j+1; i < (int)slices.size(); i++) + { + int d = countxor(slices[i],slices[j]); + + if((d < counts[i]) && (d < counts[j])) + { + if(counts[i] < counts[j]) + { + bestA = j; + bestB = i; + } + } + else if(d < counts[i]) + { + //bestA = + } + } + } + } +} + + +void foo ( void ) +{ + slice a; + slice_vec b; + + Bitslice(a,b); }
\ No newline at end of file @@ -1,757 +1,757 @@ -#include "Bitvec.h"
-
-#include "Random.h"
-
-#include <assert.h>
-#include <stdio.h>
-
-#ifndef DEBUG
-#undef assert
-void assert ( bool )
-{
-}
-#endif
-
-//----------------------------------------------------------------------------
-
-void printbits ( const void * blob, int len )
-{
- const uint8_t * data = (const uint8_t *)blob;
-
- printf("[");
- for(int i = 0; i < len; i++)
- {
- unsigned char byte = data[i];
-
- int hi = (byte >> 4);
- int lo = (byte & 0xF);
-
- if(hi) printf("%01x",hi);
- else printf(".");
-
- if(lo) printf("%01x",lo);
- else printf(".");
-
- if(i != len-1) printf(" ");
- }
- printf("]");
-}
-
-void printbits2 ( const uint8_t * k, int nbytes )
-{
- printf("[");
-
- for(int i = nbytes-1; i >= 0; i--)
- {
- uint8_t b = k[i];
-
- for(int j = 7; j >= 0; j--)
- {
- uint8_t c = (b & (1 << j)) ? '#' : ' ';
-
- putc(c,stdout);
- }
- }
- printf("]");
-}
-
-void printhex32 ( const void * blob, int len )
-{
- assert((len & 3) == 0);
-
- uint32_t * d = (uint32_t*)blob;
-
- printf("{ ");
-
- for(int i = 0; i < len/4; i++)
- {
- printf("0x%08x, ",d[i]);
- }
-
- printf("}");
-}
-
-void printbytes ( const void * blob, int len )
-{
- uint8_t * d = (uint8_t*)blob;
-
- printf("{ ");
-
- for(int i = 0; i < len; i++)
- {
- printf("0x%02x, ",d[i]);
- }
-
- printf(" };");
-}
-
-void printbytes2 ( const void * blob, int len )
-{
- uint8_t * d = (uint8_t*)blob;
-
- for(int i = 0; i < len; i++)
- {
- printf("%02x ",d[i]);
- }
-}
-
-//-----------------------------------------------------------------------------
-// Bit-level manipulation
-
-// These two are from the "Bit Twiddling Hacks" webpage
-
-uint32_t popcount ( uint32_t v )
-{
- v = v - ((v >> 1) & 0x55555555); // reuse input as temporary
- v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp
- uint32_t c = ((v + ((v >> 4) & 0xF0F0F0F)) * 0x1010101) >> 24; // count
-
- return c;
-}
-
-uint32_t parity ( uint32_t v )
-{
- v ^= v >> 1;
- v ^= v >> 2;
- v = (v & 0x11111111U) * 0x11111111U;
- return (v >> 28) & 1;
-}
-
-//-----------------------------------------------------------------------------
-
-uint32_t getbit ( const void * block, int len, uint32_t bit )
-{
- uint8_t * b = (uint8_t*)block;
-
- int byte = bit >> 3;
- bit = bit & 0x7;
-
- if(byte < len) return (b[byte] >> bit) & 1;
-
- return 0;
-}
-
-uint32_t getbit_wrap ( const void * block, int len, uint32_t bit )
-{
- uint8_t * b = (uint8_t*)block;
-
- int byte = bit >> 3;
- bit = bit & 0x7;
-
- byte %= len;
-
- return (b[byte] >> bit) & 1;
-}
-
-void setbit ( void * block, int len, uint32_t bit )
-{
- uint8_t * b = (uint8_t*)block;
-
- int byte = bit >> 3;
- bit = bit & 0x7;
-
- if(byte < len) b[byte] |= (1 << bit);
-}
-
-void setbit ( void * block, int len, uint32_t bit, uint32_t val )
-{
- val ? setbit(block,len,bit) : clearbit(block,len,bit);
-}
-
-void clearbit ( void * block, int len, uint32_t bit )
-{
- uint8_t * b = (uint8_t*)block;
-
- int byte = bit >> 3;
- bit = bit & 0x7;
-
- if(byte < len) b[byte] &= ~(1 << bit);
-}
-
-void flipbit ( void * block, int len, uint32_t bit )
-{
- uint8_t * b = (uint8_t*)block;
-
- int byte = bit >> 3;
- bit = bit & 0x7;
-
- if(byte < len) b[byte] ^= (1 << bit);
-}
-
-// from the "Bit Twiddling Hacks" webpage
-
-int countbits ( uint32_t v )
-{
- v = v - ((v >> 1) & 0x55555555); // reuse input as temporary
- v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp
- int c = ((v + ((v >> 4) & 0xF0F0F0F)) * 0x1010101) >> 24; // count
-
- return c;
-}
-
-//-----------------------------------------------------------------------------
-
-void lshift1 ( void * blob, int len, int c )
-{
- int nbits = len*8;
-
- for(int i = nbits-1; i >= 0; i--)
- {
- setbit(blob,len,i,getbit(blob,len,i-c));
- }
-}
-
-
-void lshift8 ( void * blob, int nbytes, int c )
-{
- uint8_t * k = (uint8_t*)blob;
-
- if(c == 0) return;
-
- int b = c >> 3;
- c &= 7;
-
- for(int i = nbytes-1; i >= b; i--)
- {
- k[i] = k[i-b];
- }
-
- for(int i = b-1; i >= 0; i--)
- {
- k[i] = 0;
- }
-
- if(c == 0) return;
-
- for(int i = nbytes-1; i >= 0; i--)
- {
- uint8_t a = k[i];
- uint8_t b = (i == 0) ? 0 : k[i-1];
-
- k[i] = (a << c) | (b >> (8-c));
- }
-}
-
-void lshift32 ( void * blob, int len, int c )
-{
- assert((len & 3) == 0);
-
- int nbytes = len;
- int ndwords = nbytes / 4;
-
- uint32_t * k = reinterpret_cast<uint32_t*>(blob);
-
- if(c == 0) return;
-
- //----------
-
- int b = c / 32;
- c &= (32-1);
-
- for(int i = ndwords-1; i >= b; i--)
- {
- k[i] = k[i-b];
- }
-
- for(int i = b-1; i >= 0; i--)
- {
- k[i] = 0;
- }
-
- if(c == 0) return;
-
- for(int i = ndwords-1; i >= 0; i--)
- {
- uint32_t a = k[i];
- uint32_t b = (i == 0) ? 0 : k[i-1];
-
- k[i] = (a << c) | (b >> (32-c));
- }
-}
-
-//-----------------------------------------------------------------------------
-
-void rshift1 ( void * blob, int len, int c )
-{
- int nbits = len*8;
-
- for(int i = 0; i < nbits; i++)
- {
- setbit(blob,len,i,getbit(blob,len,i+c));
- }
-}
-
-void rshift8 ( void * blob, int nbytes, int c )
-{
- uint8_t * k = (uint8_t*)blob;
-
- if(c == 0) return;
-
- int b = c >> 3;
- c &= 7;
-
- for(int i = 0; i < nbytes-b; i++)
- {
- k[i] = k[i+b];
- }
-
- for(int i = nbytes-b; i < nbytes; i++)
- {
- k[i] = 0;
- }
-
- if(c == 0) return;
-
- for(int i = 0; i < nbytes; i++)
- {
- uint8_t a = (i == nbytes-1) ? 0 : k[i+1];
- uint8_t b = k[i];
-
- k[i] = (a << (8-c) ) | (b >> c);
- }
-}
-
-void rshift32 ( void * blob, int len, int c )
-{
- assert((len & 3) == 0);
-
- int nbytes = len;
- int ndwords = nbytes / 4;
-
- uint32_t * k = (uint32_t*)blob;
-
- //----------
-
- if(c == 0) return;
-
- int b = c / 32;
- c &= (32-1);
-
- for(int i = 0; i < ndwords-b; i++)
- {
- k[i] = k[i+b];
- }
-
- for(int i = ndwords-b; i < ndwords; i++)
- {
- k[i] = 0;
- }
-
- if(c == 0) return;
-
- for(int i = 0; i < ndwords; i++)
- {
- uint32_t a = (i == ndwords-1) ? 0 : k[i+1];
- uint32_t b = k[i];
-
- k[i] = (a << (32-c) ) | (b >> c);
- }
-}
-
-//-----------------------------------------------------------------------------
-
-void lrot1 ( void * blob, int len, int c )
-{
- int nbits = len * 8;
-
- for(int i = 0; i < c; i++)
- {
- uint32_t bit = getbit(blob,len,nbits-1);
-
- lshift1(blob,len,1);
-
- setbit(blob,len,0,bit);
- }
-}
-
-void lrot8 ( void * blob, int len, int c )
-{
- int nbytes = len;
-
- uint8_t * k = (uint8_t*)blob;
-
- if(c == 0) return;
-
- //----------
-
- int b = c / 8;
- c &= (8-1);
-
- for(int j = 0; j < b; j++)
- {
- uint8_t t = k[nbytes-1];
-
- for(int i = nbytes-1; i > 0; i--)
- {
- k[i] = k[i-1];
- }
-
- k[0] = t;
- }
-
- uint8_t t = k[nbytes-1];
-
- if(c == 0) return;
-
- for(int i = nbytes-1; i >= 0; i--)
- {
- uint8_t a = k[i];
- uint8_t b = (i == 0) ? t : k[i-1];
-
- k[i] = (a << c) | (b >> (8-c));
- }
-}
-
-void lrot32 ( void * blob, int len, int c )
-{
- assert((len & 3) == 0);
-
- int nbytes = len;
- int ndwords = nbytes/4;
-
- uint32_t * k = (uint32_t*)blob;
-
- if(c == 0) return;
-
- //----------
-
- int b = c / 32;
- c &= (32-1);
-
- for(int j = 0; j < b; j++)
- {
- uint32_t t = k[ndwords-1];
-
- for(int i = ndwords-1; i > 0; i--)
- {
- k[i] = k[i-1];
- }
-
- k[0] = t;
- }
-
- uint32_t t = k[ndwords-1];
-
- if(c == 0) return;
-
- for(int i = ndwords-1; i >= 0; i--)
- {
- uint32_t a = k[i];
- uint32_t b = (i == 0) ? t : k[i-1];
-
- k[i] = (a << c) | (b >> (32-c));
- }
-}
-
-//-----------------------------------------------------------------------------
-
-void rrot1 ( void * blob, int len, int c )
-{
- int nbits = len * 8;
-
- for(int i = 0; i < c; i++)
- {
- uint32_t bit = getbit(blob,len,0);
-
- rshift1(blob,len,1);
-
- setbit(blob,len,nbits-1,bit);
- }
-}
-
-void rrot8 ( void * blob, int len, int c )
-{
- int nbytes = len;
-
- uint8_t * k = (uint8_t*)blob;
-
- if(c == 0) return;
-
- //----------
-
- int b = c / 8;
- c &= (8-1);
-
- for(int j = 0; j < b; j++)
- {
- uint8_t t = k[0];
-
- for(int i = 0; i < nbytes-1; i++)
- {
- k[i] = k[i+1];
- }
-
- k[nbytes-1] = t;
- }
-
- if(c == 0) return;
-
- //----------
-
- uint8_t t = k[0];
-
- for(int i = 0; i < nbytes; i++)
- {
- uint8_t a = (i == nbytes-1) ? t : k[i+1];
- uint8_t b = k[i];
-
- k[i] = (a << (8-c)) | (b >> c);
- }
-}
-
-void rrot32 ( void * blob, int len, int c )
-{
- assert((len & 3) == 0);
-
- int nbytes = len;
- int ndwords = nbytes/4;
-
- uint32_t * k = (uint32_t*)blob;
-
- if(c == 0) return;
-
- //----------
-
- int b = c / 32;
- c &= (32-1);
-
- for(int j = 0; j < b; j++)
- {
- uint32_t t = k[0];
-
- for(int i = 0; i < ndwords-1; i++)
- {
- k[i] = k[i+1];
- }
-
- k[ndwords-1] = t;
- }
-
- if(c == 0) return;
-
- //----------
-
- uint32_t t = k[0];
-
- for(int i = 0; i < ndwords; i++)
- {
- uint32_t a = (i == ndwords-1) ? t : k[i+1];
- uint32_t b = k[i];
-
- k[i] = (a << (32-c)) | (b >> c);
- }
-}
-
-//-----------------------------------------------------------------------------
-
-uint32_t window1 ( void * blob, int len, int start, int count )
-{
- int nbits = len*8;
- start %= nbits;
-
- uint32_t t = 0;
-
- for(int i = 0; i < count; i++)
- {
- setbit(&t,sizeof(t),i, getbit_wrap(blob,len,start+i));
- }
-
- return t;
-}
-
-uint32_t window8 ( void * blob, int len, int start, int count )
-{
- int nbits = len*8;
- start %= nbits;
-
- uint32_t t = 0;
- uint8_t * k = (uint8_t*)blob;
-
- if(count == 0) return 0;
-
- int c = start & (8-1);
- int d = start / 8;
-
- for(int i = 0; i < 4; i++)
- {
- int ia = (i + d + 1) % len;
- int ib = (i + d + 0) % len;
-
- uint32_t a = k[ia];
- uint32_t b = k[ib];
-
- uint32_t m = (a << (8-c)) | (b >> c);
-
- t |= (m << (8*i));
-
- }
-
- t &= ((1 << count)-1);
-
- return t;
-}
-
-uint32_t window32 ( void * blob, int len, int start, int count )
-{
- int nbits = len*8;
- start %= nbits;
-
- assert((len & 3) == 0);
-
- int ndwords = len / 4;
-
- uint32_t * k = (uint32_t*)blob;
-
- if(count == 0) return 0;
-
- int c = start & (32-1);
- int d = start / 32;
-
- if(c == 0) return (k[d] & ((1 << count) - 1));
-
- int ia = (d + 1) % ndwords;
- int ib = (d + 0) % ndwords;
-
- uint32_t a = k[ia];
- uint32_t b = k[ib];
-
- uint32_t t = (a << (32-c)) | (b >> c);
-
- t &= ((1 << count)-1);
-
- return t;
-}
-
-//-----------------------------------------------------------------------------
-
-bool test_shift ( void )
-{
- Rand r(1123);
-
- int nbits = 64;
- int nbytes = nbits / 8;
- int reps = 10000;
-
- for(int j = 0; j < reps; j++)
- {
- if(j % (reps/10) == 0) printf(".");
-
- uint64_t a = r.rand_u64();
- uint64_t b;
-
- for(int i = 0; i < nbits; i++)
- {
- b = a; lshift1 (&b,nbytes,i); assert(b == (a << i));
- b = a; lshift8 (&b,nbytes,i); assert(b == (a << i));
- b = a; lshift32 (&b,nbytes,i); assert(b == (a << i));
-
- b = a; rshift1 (&b,nbytes,i); assert(b == (a >> i));
- b = a; rshift8 (&b,nbytes,i); assert(b == (a >> i));
- b = a; rshift32 (&b,nbytes,i); assert(b == (a >> i));
-
- b = a; lrot1 (&b,nbytes,i); assert(b == ROTL64(a,i));
- b = a; lrot8 (&b,nbytes,i); assert(b == ROTL64(a,i));
- b = a; lrot32 (&b,nbytes,i); assert(b == ROTL64(a,i));
-
- b = a; rrot1 (&b,nbytes,i); assert(b == ROTR64(a,i));
- b = a; rrot8 (&b,nbytes,i); assert(b == ROTR64(a,i));
- b = a; rrot32 (&b,nbytes,i); assert(b == ROTR64(a,i));
- }
- }
-
- printf("PASS\n");
- return true;
-}
-
-//-----------------------------------------------------------------------------
-
-template < int nbits >
-bool test_window2 ( void )
-{
- Rand r(83874);
-
- struct keytype
- {
- uint8_t bytes[nbits/8];
- };
-
- int nbytes = nbits / 8;
- int reps = 10000;
-
- for(int j = 0; j < reps; j++)
- {
- if(j % (reps/10) == 0) printf(".");
-
- keytype k;
-
- r.rand_p(&k,nbytes);
-
- for(int start = 0; start < nbits; start++)
- {
- for(int count = 0; count < 32; count++)
- {
- uint32_t a = window1(&k,nbytes,start,count);
- uint32_t b = window8(&k,nbytes,start,count);
- uint32_t c = window(&k,nbytes,start,count);
-
- assert(a == b);
- assert(a == c);
- }
- }
- }
-
- printf("PASS %d\n",nbits);
-
- return true;
-}
-
-bool test_window ( void )
-{
- Rand r(48402);
-
- int reps = 10000;
-
- for(int j = 0; j < reps; j++)
- {
- if(j % (reps/10) == 0) printf(".");
-
- int nbits = 64;
- int nbytes = nbits / 8;
-
- uint64_t x = r.rand_u64();
-
- for(int start = 0; start < nbits; start++)
- {
- for(int count = 0; count < 32; count++)
- {
- uint32_t a = (uint32_t)ROTR64(x,start);
- a &= ((1 << count)-1);
-
- uint32_t b = window1 (&x,nbytes,start,count);
- uint32_t c = window8 (&x,nbytes,start,count);
- uint32_t d = window32(&x,nbytes,start,count);
- uint32_t e = window (x,start,count);
-
- assert(a == b);
- assert(a == c);
- assert(a == d);
- assert(a == e);
- }
- }
- }
-
- printf("PASS 64\n");
-
- test_window2<8>();
- test_window2<16>();
- test_window2<24>();
- test_window2<32>();
- test_window2<40>();
- test_window2<48>();
- test_window2<56>();
- test_window2<64>();
-
- return true;
-}
-
-//-----------------------------------------------------------------------------
+#include "Bitvec.h" + +#include "Random.h" + +#include <assert.h> +#include <stdio.h> + +#ifndef DEBUG +#undef assert +void assert ( bool ) +{ +} +#endif + +//---------------------------------------------------------------------------- + +void printbits ( const void * blob, int len ) +{ + const uint8_t * data = (const uint8_t *)blob; + + printf("["); + for(int i = 0; i < len; i++) + { + unsigned char byte = data[i]; + + int hi = (byte >> 4); + int lo = (byte & 0xF); + + if(hi) printf("%01x",hi); + else printf("."); + + if(lo) printf("%01x",lo); + else printf("."); + + if(i != len-1) printf(" "); + } + printf("]"); +} + +void printbits2 ( const uint8_t * k, int nbytes ) +{ + printf("["); + + for(int i = nbytes-1; i >= 0; i--) + { + uint8_t b = k[i]; + + for(int j = 7; j >= 0; j--) + { + uint8_t c = (b & (1 << j)) ? '#' : ' '; + + putc(c,stdout); + } + } + printf("]"); +} + +void printhex32 ( const void * blob, int len ) +{ + assert((len & 3) == 0); + + uint32_t * d = (uint32_t*)blob; + + printf("{ "); + + for(int i = 0; i < len/4; i++) + { + printf("0x%08x, ",d[i]); + } + + printf("}"); +} + +void printbytes ( const void * blob, int len ) +{ + uint8_t * d = (uint8_t*)blob; + + printf("{ "); + + for(int i = 0; i < len; i++) + { + printf("0x%02x, ",d[i]); + } + + printf(" };"); +} + +void printbytes2 ( const void * blob, int len ) +{ + uint8_t * d = (uint8_t*)blob; + + for(int i = 0; i < len; i++) + { + printf("%02x ",d[i]); + } +} + +//----------------------------------------------------------------------------- +// Bit-level manipulation + +// These two are from the "Bit Twiddling Hacks" webpage + +uint32_t popcount ( uint32_t v ) +{ + v = v - ((v >> 1) & 0x55555555); // reuse input as temporary + v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp + uint32_t c = ((v + ((v >> 4) & 0xF0F0F0F)) * 0x1010101) >> 24; // count + + return c; +} + +uint32_t parity ( uint32_t v ) +{ + v ^= v >> 1; + v ^= v >> 2; + v = (v & 0x11111111U) * 0x11111111U; + return (v >> 28) & 1; +} + +//----------------------------------------------------------------------------- + +uint32_t getbit ( const void * block, int len, uint32_t bit ) +{ + uint8_t * b = (uint8_t*)block; + + int byte = bit >> 3; + bit = bit & 0x7; + + if(byte < len) return (b[byte] >> bit) & 1; + + return 0; +} + +uint32_t getbit_wrap ( const void * block, int len, uint32_t bit ) +{ + uint8_t * b = (uint8_t*)block; + + int byte = bit >> 3; + bit = bit & 0x7; + + byte %= len; + + return (b[byte] >> bit) & 1; +} + +void setbit ( void * block, int len, uint32_t bit ) +{ + uint8_t * b = (uint8_t*)block; + + int byte = bit >> 3; + bit = bit & 0x7; + + if(byte < len) b[byte] |= (1 << bit); +} + +void setbit ( void * block, int len, uint32_t bit, uint32_t val ) +{ + val ? setbit(block,len,bit) : clearbit(block,len,bit); +} + +void clearbit ( void * block, int len, uint32_t bit ) +{ + uint8_t * b = (uint8_t*)block; + + int byte = bit >> 3; + bit = bit & 0x7; + + if(byte < len) b[byte] &= ~(1 << bit); +} + +void flipbit ( void * block, int len, uint32_t bit ) +{ + uint8_t * b = (uint8_t*)block; + + int byte = bit >> 3; + bit = bit & 0x7; + + if(byte < len) b[byte] ^= (1 << bit); +} + +// from the "Bit Twiddling Hacks" webpage + +int countbits ( uint32_t v ) +{ + v = v - ((v >> 1) & 0x55555555); // reuse input as temporary + v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp + int c = ((v + ((v >> 4) & 0xF0F0F0F)) * 0x1010101) >> 24; // count + + return c; +} + +//----------------------------------------------------------------------------- + +void lshift1 ( void * blob, int len, int c ) +{ + int nbits = len*8; + + for(int i = nbits-1; i >= 0; i--) + { + setbit(blob,len,i,getbit(blob,len,i-c)); + } +} + + +void lshift8 ( void * blob, int nbytes, int c ) +{ + uint8_t * k = (uint8_t*)blob; + + if(c == 0) return; + + int b = c >> 3; + c &= 7; + + for(int i = nbytes-1; i >= b; i--) + { + k[i] = k[i-b]; + } + + for(int i = b-1; i >= 0; i--) + { + k[i] = 0; + } + + if(c == 0) return; + + for(int i = nbytes-1; i >= 0; i--) + { + uint8_t a = k[i]; + uint8_t b = (i == 0) ? 0 : k[i-1]; + + k[i] = (a << c) | (b >> (8-c)); + } +} + +void lshift32 ( void * blob, int len, int c ) +{ + assert((len & 3) == 0); + + int nbytes = len; + int ndwords = nbytes / 4; + + uint32_t * k = reinterpret_cast<uint32_t*>(blob); + + if(c == 0) return; + + //---------- + + int b = c / 32; + c &= (32-1); + + for(int i = ndwords-1; i >= b; i--) + { + k[i] = k[i-b]; + } + + for(int i = b-1; i >= 0; i--) + { + k[i] = 0; + } + + if(c == 0) return; + + for(int i = ndwords-1; i >= 0; i--) + { + uint32_t a = k[i]; + uint32_t b = (i == 0) ? 0 : k[i-1]; + + k[i] = (a << c) | (b >> (32-c)); + } +} + +//----------------------------------------------------------------------------- + +void rshift1 ( void * blob, int len, int c ) +{ + int nbits = len*8; + + for(int i = 0; i < nbits; i++) + { + setbit(blob,len,i,getbit(blob,len,i+c)); + } +} + +void rshift8 ( void * blob, int nbytes, int c ) +{ + uint8_t * k = (uint8_t*)blob; + + if(c == 0) return; + + int b = c >> 3; + c &= 7; + + for(int i = 0; i < nbytes-b; i++) + { + k[i] = k[i+b]; + } + + for(int i = nbytes-b; i < nbytes; i++) + { + k[i] = 0; + } + + if(c == 0) return; + + for(int i = 0; i < nbytes; i++) + { + uint8_t a = (i == nbytes-1) ? 0 : k[i+1]; + uint8_t b = k[i]; + + k[i] = (a << (8-c) ) | (b >> c); + } +} + +void rshift32 ( void * blob, int len, int c ) +{ + assert((len & 3) == 0); + + int nbytes = len; + int ndwords = nbytes / 4; + + uint32_t * k = (uint32_t*)blob; + + //---------- + + if(c == 0) return; + + int b = c / 32; + c &= (32-1); + + for(int i = 0; i < ndwords-b; i++) + { + k[i] = k[i+b]; + } + + for(int i = ndwords-b; i < ndwords; i++) + { + k[i] = 0; + } + + if(c == 0) return; + + for(int i = 0; i < ndwords; i++) + { + uint32_t a = (i == ndwords-1) ? 0 : k[i+1]; + uint32_t b = k[i]; + + k[i] = (a << (32-c) ) | (b >> c); + } +} + +//----------------------------------------------------------------------------- + +void lrot1 ( void * blob, int len, int c ) +{ + int nbits = len * 8; + + for(int i = 0; i < c; i++) + { + uint32_t bit = getbit(blob,len,nbits-1); + + lshift1(blob,len,1); + + setbit(blob,len,0,bit); + } +} + +void lrot8 ( void * blob, int len, int c ) +{ + int nbytes = len; + + uint8_t * k = (uint8_t*)blob; + + if(c == 0) return; + + //---------- + + int b = c / 8; + c &= (8-1); + + for(int j = 0; j < b; j++) + { + uint8_t t = k[nbytes-1]; + + for(int i = nbytes-1; i > 0; i--) + { + k[i] = k[i-1]; + } + + k[0] = t; + } + + uint8_t t = k[nbytes-1]; + + if(c == 0) return; + + for(int i = nbytes-1; i >= 0; i--) + { + uint8_t a = k[i]; + uint8_t b = (i == 0) ? t : k[i-1]; + + k[i] = (a << c) | (b >> (8-c)); + } +} + +void lrot32 ( void * blob, int len, int c ) +{ + assert((len & 3) == 0); + + int nbytes = len; + int ndwords = nbytes/4; + + uint32_t * k = (uint32_t*)blob; + + if(c == 0) return; + + //---------- + + int b = c / 32; + c &= (32-1); + + for(int j = 0; j < b; j++) + { + uint32_t t = k[ndwords-1]; + + for(int i = ndwords-1; i > 0; i--) + { + k[i] = k[i-1]; + } + + k[0] = t; + } + + uint32_t t = k[ndwords-1]; + + if(c == 0) return; + + for(int i = ndwords-1; i >= 0; i--) + { + uint32_t a = k[i]; + uint32_t b = (i == 0) ? t : k[i-1]; + + k[i] = (a << c) | (b >> (32-c)); + } +} + +//----------------------------------------------------------------------------- + +void rrot1 ( void * blob, int len, int c ) +{ + int nbits = len * 8; + + for(int i = 0; i < c; i++) + { + uint32_t bit = getbit(blob,len,0); + + rshift1(blob,len,1); + + setbit(blob,len,nbits-1,bit); + } +} + +void rrot8 ( void * blob, int len, int c ) +{ + int nbytes = len; + + uint8_t * k = (uint8_t*)blob; + + if(c == 0) return; + + //---------- + + int b = c / 8; + c &= (8-1); + + for(int j = 0; j < b; j++) + { + uint8_t t = k[0]; + + for(int i = 0; i < nbytes-1; i++) + { + k[i] = k[i+1]; + } + + k[nbytes-1] = t; + } + + if(c == 0) return; + + //---------- + + uint8_t t = k[0]; + + for(int i = 0; i < nbytes; i++) + { + uint8_t a = (i == nbytes-1) ? t : k[i+1]; + uint8_t b = k[i]; + + k[i] = (a << (8-c)) | (b >> c); + } +} + +void rrot32 ( void * blob, int len, int c ) +{ + assert((len & 3) == 0); + + int nbytes = len; + int ndwords = nbytes/4; + + uint32_t * k = (uint32_t*)blob; + + if(c == 0) return; + + //---------- + + int b = c / 32; + c &= (32-1); + + for(int j = 0; j < b; j++) + { + uint32_t t = k[0]; + + for(int i = 0; i < ndwords-1; i++) + { + k[i] = k[i+1]; + } + + k[ndwords-1] = t; + } + + if(c == 0) return; + + //---------- + + uint32_t t = k[0]; + + for(int i = 0; i < ndwords; i++) + { + uint32_t a = (i == ndwords-1) ? t : k[i+1]; + uint32_t b = k[i]; + + k[i] = (a << (32-c)) | (b >> c); + } +} + +//----------------------------------------------------------------------------- + +uint32_t window1 ( void * blob, int len, int start, int count ) +{ + int nbits = len*8; + start %= nbits; + + uint32_t t = 0; + + for(int i = 0; i < count; i++) + { + setbit(&t,sizeof(t),i, getbit_wrap(blob,len,start+i)); + } + + return t; +} + +uint32_t window8 ( void * blob, int len, int start, int count ) +{ + int nbits = len*8; + start %= nbits; + + uint32_t t = 0; + uint8_t * k = (uint8_t*)blob; + + if(count == 0) return 0; + + int c = start & (8-1); + int d = start / 8; + + for(int i = 0; i < 4; i++) + { + int ia = (i + d + 1) % len; + int ib = (i + d + 0) % len; + + uint32_t a = k[ia]; + uint32_t b = k[ib]; + + uint32_t m = (a << (8-c)) | (b >> c); + + t |= (m << (8*i)); + + } + + t &= ((1 << count)-1); + + return t; +} + +uint32_t window32 ( void * blob, int len, int start, int count ) +{ + int nbits = len*8; + start %= nbits; + + assert((len & 3) == 0); + + int ndwords = len / 4; + + uint32_t * k = (uint32_t*)blob; + + if(count == 0) return 0; + + int c = start & (32-1); + int d = start / 32; + + if(c == 0) return (k[d] & ((1 << count) - 1)); + + int ia = (d + 1) % ndwords; + int ib = (d + 0) % ndwords; + + uint32_t a = k[ia]; + uint32_t b = k[ib]; + + uint32_t t = (a << (32-c)) | (b >> c); + + t &= ((1 << count)-1); + + return t; +} + +//----------------------------------------------------------------------------- + +bool test_shift ( void ) +{ + Rand r(1123); + + int nbits = 64; + int nbytes = nbits / 8; + int reps = 10000; + + for(int j = 0; j < reps; j++) + { + if(j % (reps/10) == 0) printf("."); + + uint64_t a = r.rand_u64(); + uint64_t b; + + for(int i = 0; i < nbits; i++) + { + b = a; lshift1 (&b,nbytes,i); assert(b == (a << i)); + b = a; lshift8 (&b,nbytes,i); assert(b == (a << i)); + b = a; lshift32 (&b,nbytes,i); assert(b == (a << i)); + + b = a; rshift1 (&b,nbytes,i); assert(b == (a >> i)); + b = a; rshift8 (&b,nbytes,i); assert(b == (a >> i)); + b = a; rshift32 (&b,nbytes,i); assert(b == (a >> i)); + + b = a; lrot1 (&b,nbytes,i); assert(b == ROTL64(a,i)); + b = a; lrot8 (&b,nbytes,i); assert(b == ROTL64(a,i)); + b = a; lrot32 (&b,nbytes,i); assert(b == ROTL64(a,i)); + + b = a; rrot1 (&b,nbytes,i); assert(b == ROTR64(a,i)); + b = a; rrot8 (&b,nbytes,i); assert(b == ROTR64(a,i)); + b = a; rrot32 (&b,nbytes,i); assert(b == ROTR64(a,i)); + } + } + + printf("PASS\n"); + return true; +} + +//----------------------------------------------------------------------------- + +template < int nbits > +bool test_window2 ( void ) +{ + Rand r(83874); + + struct keytype + { + uint8_t bytes[nbits/8]; + }; + + int nbytes = nbits / 8; + int reps = 10000; + + for(int j = 0; j < reps; j++) + { + if(j % (reps/10) == 0) printf("."); + + keytype k; + + r.rand_p(&k,nbytes); + + for(int start = 0; start < nbits; start++) + { + for(int count = 0; count < 32; count++) + { + uint32_t a = window1(&k,nbytes,start,count); + uint32_t b = window8(&k,nbytes,start,count); + uint32_t c = window(&k,nbytes,start,count); + + assert(a == b); + assert(a == c); + } + } + } + + printf("PASS %d\n",nbits); + + return true; +} + +bool test_window ( void ) +{ + Rand r(48402); + + int reps = 10000; + + for(int j = 0; j < reps; j++) + { + if(j % (reps/10) == 0) printf("."); + + int nbits = 64; + int nbytes = nbits / 8; + + uint64_t x = r.rand_u64(); + + for(int start = 0; start < nbits; start++) + { + for(int count = 0; count < 32; count++) + { + uint32_t a = (uint32_t)ROTR64(x,start); + a &= ((1 << count)-1); + + uint32_t b = window1 (&x,nbytes,start,count); + uint32_t c = window8 (&x,nbytes,start,count); + uint32_t d = window32(&x,nbytes,start,count); + uint32_t e = window (x,start,count); + + assert(a == b); + assert(a == c); + assert(a == d); + assert(a == e); + } + } + } + + printf("PASS 64\n"); + + test_window2<8>(); + test_window2<16>(); + test_window2<24>(); + test_window2<32>(); + test_window2<40>(); + test_window2<48>(); + test_window2<56>(); + test_window2<64>(); + + return true; +} + +//----------------------------------------------------------------------------- @@ -1,245 +1,245 @@ -#pragma once
-
-#include "Platform.h"
-
-#include <vector>
-
-//-----------------------------------------------------------------------------
-
-void printbits ( const void * blob, int len );
-void printhex32 ( const void * blob, int len );
-void printbytes ( const void * blob, int len );
-void printbytes2 ( const void * blob, int len );
-
-uint32_t popcount ( uint32_t v );
-uint32_t parity ( uint32_t v );
-
-uint32_t getbit ( const void * blob, int len, uint32_t bit );
-uint32_t getbit_wrap ( const void * blob, int len, uint32_t bit );
-
-void setbit ( void * blob, int len, uint32_t bit );
-void setbit ( void * blob, int len, uint32_t bit, uint32_t val );
-
-void clearbit ( void * blob, int len, uint32_t bit );
-
-void flipbit ( void * blob, int len, uint32_t bit );
-
-int countbits ( uint32_t v );
-int countbits ( std::vector<uint32_t> & v );
-
-int countbits ( const void * blob, int len );
-
-void invert ( std::vector<uint32_t> & v );
-
-//----------
-
-template< typename T >
-inline uint32_t getbit ( T & blob, uint32_t bit )
-{
- return getbit(&blob,sizeof(blob),bit);
-}
-
-template<> inline uint32_t getbit ( uint32_t & blob, uint32_t bit ) { return (blob >> (bit & 31)) & 1; }
-template<> inline uint32_t getbit ( uint64_t & blob, uint32_t bit ) { return (blob >> (bit & 63)) & 1; }
-
-//----------
-
-template< typename T >
-inline void setbit ( T & blob, uint32_t bit )
-{
- return setbit(&blob,sizeof(blob),bit);
-}
-
-template<> inline void setbit ( uint32_t & blob, uint32_t bit ) { blob |= uint32_t(1) << (bit & 31); }
-template<> inline void setbit ( uint64_t & blob, uint32_t bit ) { blob |= uint64_t(1) << (bit & 63); }
-
-//----------
-
-template< typename T >
-inline void flipbit ( T & blob, uint32_t bit )
-{
- flipbit(&blob,sizeof(blob),bit);
-}
-
-template<> inline void flipbit ( uint32_t & blob, uint32_t bit ) { bit &= 31; blob ^= (uint32_t(1) << bit); }
-template<> inline void flipbit ( uint64_t & blob, uint32_t bit ) { bit &= 63; blob ^= (uint64_t(1) << bit); }
-
-//-----------------------------------------------------------------------------
-// Left and right shift of blobs. The shift(N) versions work on chunks of N
-// bits at a time (faster)
-
-void lshift1 ( void * blob, int len, int c );
-void lshift8 ( void * blob, int len, int c );
-void lshift32 ( void * blob, int len, int c );
-
-void rshift1 ( void * blob, int len, int c );
-void rshift8 ( void * blob, int len, int c );
-void rshift32 ( void * blob, int len, int c );
-
-inline void lshift ( void * blob, int len, int c )
-{
- if((len & 3) == 0)
- {
- lshift32(blob,len,c);
- }
- else
- {
- lshift8(blob,len,c);
- }
-}
-
-inline void rshift ( void * blob, int len, int c )
-{
- if((len & 3) == 0)
- {
- rshift32(blob,len,c);
- }
- else
- {
- rshift8(blob,len,c);
- }
-}
-
-template < typename T >
-inline void lshift ( T & blob, int c )
-{
- if((sizeof(T) & 3) == 0)
- {
- lshift32(&blob,sizeof(T),c);
- }
- else
- {
- lshift8(&blob,sizeof(T),c);
- }
-}
-
-template < typename T >
-inline void rshift ( T & blob, int c )
-{
- if((sizeof(T) & 3) == 0)
- {
- lshift32(&blob,sizeof(T),c);
- }
- else
- {
- lshift8(&blob,sizeof(T),c);
- }
-}
-
-template<> inline void lshift ( uint32_t & blob, int c ) { blob <<= c; }
-template<> inline void lshift ( uint64_t & blob, int c ) { blob <<= c; }
-template<> inline void rshift ( uint32_t & blob, int c ) { blob >>= c; }
-template<> inline void rshift ( uint64_t & blob, int c ) { blob >>= c; }
-
-//-----------------------------------------------------------------------------
-// Left and right rotate of blobs. The rot(N) versions work on chunks of N
-// bits at a time (faster)
-
-void lrot1 ( void * blob, int len, int c );
-void lrot8 ( void * blob, int len, int c );
-void lrot32 ( void * blob, int len, int c );
-
-void rrot1 ( void * blob, int len, int c );
-void rrot8 ( void * blob, int len, int c );
-void rrot32 ( void * blob, int len, int c );
-
-inline void lrot ( void * blob, int len, int c )
-{
- if((len & 3) == 0)
- {
- return lrot32(blob,len,c);
- }
- else
- {
- return lrot8(blob,len,c);
- }
-}
-
-inline void rrot ( void * blob, int len, int c )
-{
- if((len & 3) == 0)
- {
- return rrot32(blob,len,c);
- }
- else
- {
- return rrot8(blob,len,c);
- }
-}
-
-template < typename T >
-inline void lrot ( T & blob, int c )
-{
- if((sizeof(T) & 3) == 0)
- {
- return lrot32(&blob,sizeof(T),c);
- }
- else
- {
- return lrot8(&blob,sizeof(T),c);
- }
-}
-
-template < typename T >
-inline void rrot ( T & blob, int c )
-{
- if((sizeof(T) & 3) == 0)
- {
- return rrot32(&blob,sizeof(T),c);
- }
- else
- {
- return rrot8(&blob,sizeof(T),c);
- }
-}
-
-template<> inline void lrot ( uint32_t & blob, int c ) { blob = ROTL32(blob,c); }
-template<> inline void lrot ( uint64_t & blob, int c ) { blob = ROTL64(blob,c); }
-template<> inline void rrot ( uint32_t & blob, int c ) { blob = ROTR32(blob,c); }
-template<> inline void rrot ( uint64_t & blob, int c ) { blob = ROTR64(blob,c); }
-
-//-----------------------------------------------------------------------------
-// Bit-windowing functions - select some N-bit subset of the input blob
-
-uint32_t window1 ( void * blob, int len, int start, int count );
-uint32_t window8 ( void * blob, int len, int start, int count );
-uint32_t window32 ( void * blob, int len, int start, int count );
-
-inline uint32_t window ( void * blob, int len, int start, int count )
-{
- if(len & 3)
- {
- return window8(blob,len,start,count);
- }
- else
- {
- return window32(blob,len,start,count);
- }
-}
-
-template < typename T >
-inline uint32_t window ( T & blob, int start, int count )
-{
- if((sizeof(T) & 3) == 0)
- {
- return window32(&blob,sizeof(T),start,count);
- }
- else
- {
- return window8(&blob,sizeof(T),start,count);
- }
-}
-
-template<>
-inline uint32_t window ( uint32_t & blob, int start, int count )
-{
- return ROTR32(blob,start) & ((1<<count)-1);
-}
-
-template<>
-inline uint32_t window ( uint64_t & blob, int start, int count )
-{
- return (uint32_t)ROTR64(blob,start) & ((1<<count)-1);
-}
-
-//-----------------------------------------------------------------------------
+#pragma once + +#include "Platform.h" + +#include <vector> + +//----------------------------------------------------------------------------- + +void printbits ( const void * blob, int len ); +void printhex32 ( const void * blob, int len ); +void printbytes ( const void * blob, int len ); +void printbytes2 ( const void * blob, int len ); + +uint32_t popcount ( uint32_t v ); +uint32_t parity ( uint32_t v ); + +uint32_t getbit ( const void * blob, int len, uint32_t bit ); +uint32_t getbit_wrap ( const void * blob, int len, uint32_t bit ); + +void setbit ( void * blob, int len, uint32_t bit ); +void setbit ( void * blob, int len, uint32_t bit, uint32_t val ); + +void clearbit ( void * blob, int len, uint32_t bit ); + +void flipbit ( void * blob, int len, uint32_t bit ); + +int countbits ( uint32_t v ); +int countbits ( std::vector<uint32_t> & v ); + +int countbits ( const void * blob, int len ); + +void invert ( std::vector<uint32_t> & v ); + +//---------- + +template< typename T > +inline uint32_t getbit ( T & blob, uint32_t bit ) +{ + return getbit(&blob,sizeof(blob),bit); +} + +template<> inline uint32_t getbit ( uint32_t & blob, uint32_t bit ) { return (blob >> (bit & 31)) & 1; } +template<> inline uint32_t getbit ( uint64_t & blob, uint32_t bit ) { return (blob >> (bit & 63)) & 1; } + +//---------- + +template< typename T > +inline void setbit ( T & blob, uint32_t bit ) +{ + return setbit(&blob,sizeof(blob),bit); +} + +template<> inline void setbit ( uint32_t & blob, uint32_t bit ) { blob |= uint32_t(1) << (bit & 31); } +template<> inline void setbit ( uint64_t & blob, uint32_t bit ) { blob |= uint64_t(1) << (bit & 63); } + +//---------- + +template< typename T > +inline void flipbit ( T & blob, uint32_t bit ) +{ + flipbit(&blob,sizeof(blob),bit); +} + +template<> inline void flipbit ( uint32_t & blob, uint32_t bit ) { bit &= 31; blob ^= (uint32_t(1) << bit); } +template<> inline void flipbit ( uint64_t & blob, uint32_t bit ) { bit &= 63; blob ^= (uint64_t(1) << bit); } + +//----------------------------------------------------------------------------- +// Left and right shift of blobs. The shift(N) versions work on chunks of N +// bits at a time (faster) + +void lshift1 ( void * blob, int len, int c ); +void lshift8 ( void * blob, int len, int c ); +void lshift32 ( void * blob, int len, int c ); + +void rshift1 ( void * blob, int len, int c ); +void rshift8 ( void * blob, int len, int c ); +void rshift32 ( void * blob, int len, int c ); + +inline void lshift ( void * blob, int len, int c ) +{ + if((len & 3) == 0) + { + lshift32(blob,len,c); + } + else + { + lshift8(blob,len,c); + } +} + +inline void rshift ( void * blob, int len, int c ) +{ + if((len & 3) == 0) + { + rshift32(blob,len,c); + } + else + { + rshift8(blob,len,c); + } +} + +template < typename T > +inline void lshift ( T & blob, int c ) +{ + if((sizeof(T) & 3) == 0) + { + lshift32(&blob,sizeof(T),c); + } + else + { + lshift8(&blob,sizeof(T),c); + } +} + +template < typename T > +inline void rshift ( T & blob, int c ) +{ + if((sizeof(T) & 3) == 0) + { + lshift32(&blob,sizeof(T),c); + } + else + { + lshift8(&blob,sizeof(T),c); + } +} + +template<> inline void lshift ( uint32_t & blob, int c ) { blob <<= c; } +template<> inline void lshift ( uint64_t & blob, int c ) { blob <<= c; } +template<> inline void rshift ( uint32_t & blob, int c ) { blob >>= c; } +template<> inline void rshift ( uint64_t & blob, int c ) { blob >>= c; } + +//----------------------------------------------------------------------------- +// Left and right rotate of blobs. The rot(N) versions work on chunks of N +// bits at a time (faster) + +void lrot1 ( void * blob, int len, int c ); +void lrot8 ( void * blob, int len, int c ); +void lrot32 ( void * blob, int len, int c ); + +void rrot1 ( void * blob, int len, int c ); +void rrot8 ( void * blob, int len, int c ); +void rrot32 ( void * blob, int len, int c ); + +inline void lrot ( void * blob, int len, int c ) +{ + if((len & 3) == 0) + { + return lrot32(blob,len,c); + } + else + { + return lrot8(blob,len,c); + } +} + +inline void rrot ( void * blob, int len, int c ) +{ + if((len & 3) == 0) + { + return rrot32(blob,len,c); + } + else + { + return rrot8(blob,len,c); + } +} + +template < typename T > +inline void lrot ( T & blob, int c ) +{ + if((sizeof(T) & 3) == 0) + { + return lrot32(&blob,sizeof(T),c); + } + else + { + return lrot8(&blob,sizeof(T),c); + } +} + +template < typename T > +inline void rrot ( T & blob, int c ) +{ + if((sizeof(T) & 3) == 0) + { + return rrot32(&blob,sizeof(T),c); + } + else + { + return rrot8(&blob,sizeof(T),c); + } +} + +template<> inline void lrot ( uint32_t & blob, int c ) { blob = ROTL32(blob,c); } +template<> inline void lrot ( uint64_t & blob, int c ) { blob = ROTL64(blob,c); } +template<> inline void rrot ( uint32_t & blob, int c ) { blob = ROTR32(blob,c); } +template<> inline void rrot ( uint64_t & blob, int c ) { blob = ROTR64(blob,c); } + +//----------------------------------------------------------------------------- +// Bit-windowing functions - select some N-bit subset of the input blob + +uint32_t window1 ( void * blob, int len, int start, int count ); +uint32_t window8 ( void * blob, int len, int start, int count ); +uint32_t window32 ( void * blob, int len, int start, int count ); + +inline uint32_t window ( void * blob, int len, int start, int count ) +{ + if(len & 3) + { + return window8(blob,len,start,count); + } + else + { + return window32(blob,len,start,count); + } +} + +template < typename T > +inline uint32_t window ( T & blob, int start, int count ) +{ + if((sizeof(T) & 3) == 0) + { + return window32(&blob,sizeof(T),start,count); + } + else + { + return window8(&blob,sizeof(T),start,count); + } +} + +template<> +inline uint32_t window ( uint32_t & blob, int start, int count ) +{ + return ROTR32(blob,start) & ((1<<count)-1); +} + +template<> +inline uint32_t window ( uint64_t & blob, int start, int count ) +{ + return (uint32_t)ROTR64(blob,start) & ((1<<count)-1); +} + +//----------------------------------------------------------------------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b5df45..d04afdf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ add_library( AvalancheTest.cpp Bitslice.cpp Bitvec.cpp + CityTest.cpp City.cpp crc.cpp DifferentialTest.cpp @@ -24,6 +25,8 @@ add_library( Random.cpp sha1.cpp SpeedTest.cpp + Spooky.cpp + SpookyTest.cpp Stats.cpp SuperFastHash.cpp Types.cpp @@ -1,321 +1,465 @@ -// Copyright (c) 2011 Google, Inc.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-//
-// CityHash Version 1, by Geoff Pike and Jyrki Alakuijala
-//
-// This file provides CityHash64() and related functions.
-//
-// It's probably possible to create even faster hash functions by
-// writing a program that systematically explores some of the space of
-// possible hash functions, by using SIMD instructions, or by
-// compromising on hash quality.
-
-#include "City.h"
-
-#include <algorithm>
-
-using namespace std;
-
-#define UNALIGNED_LOAD64(p) (*(const uint64*)(p))
-#define UNALIGNED_LOAD32(p) (*(const uint32*)(p))
-
-#if !defined(LIKELY)
-#if defined(__GNUC__)
-#define LIKELY(x) (__builtin_expect(!!(x), 1))
-#else
-#define LIKELY(x) (x)
-#endif
-#endif
-
-// Some primes between 2^63 and 2^64 for various uses.
-static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
-static const uint64 k1 = 0xb492b66fbe98f273ULL;
-static const uint64 k2 = 0x9ae16a3b2f90404fULL;
-static const uint64 k3 = 0xc949d7c7509e6557ULL;
-
-// Bitwise right rotate. Normally this will compile to a single
-// instruction, especially if the shift is a manifest constant.
-static uint64 Rotate(uint64 val, int shift) {
- // Avoid shifting by 64: doing so yields an undefined result.
- return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
-}
-
-// Equivalent to Rotate(), but requires the second arg to be non-zero.
-// On x86-64, and probably others, it's possible for this to compile
-// to a single instruction if both args are already in registers.
-static uint64 RotateByAtLeast1(uint64 val, int shift) {
- return (val >> shift) | (val << (64 - shift));
-}
-
-static uint64 ShiftMix(uint64 val) {
- return val ^ (val >> 47);
-}
-
-static uint64 HashLen16(uint64 u, uint64 v) {
- return Hash128to64(uint128(u, v));
-}
-
-static uint64 HashLen0to16(const char *s, size_t len) {
- if (len > 8) {
- uint64 a = UNALIGNED_LOAD64(s);
- uint64 b = UNALIGNED_LOAD64(s + len - 8);
- return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b;
- }
- if (len >= 4) {
- uint64 a = UNALIGNED_LOAD32(s);
- return HashLen16(len + (a << 3), UNALIGNED_LOAD32(s + len - 4));
- }
- if (len > 0) {
- uint8 a = s[0];
- uint8 b = s[len >> 1];
- uint8 c = s[len - 1];
- uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
- uint32 z = len + (static_cast<uint32>(c) << 2);
- return ShiftMix(y * k2 ^ z * k3) * k2;
- }
- return k2;
-}
-
-// This probably works well for 16-byte strings as well, but it may be overkill
-// in that case.
-static uint64 HashLen17to32(const char *s, size_t len) {
- uint64 a = UNALIGNED_LOAD64(s) * k1;
- uint64 b = UNALIGNED_LOAD64(s + 8);
- uint64 c = UNALIGNED_LOAD64(s + len - 8) * k2;
- uint64 d = UNALIGNED_LOAD64(s + len - 16) * k0;
- return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
- a + Rotate(b ^ k3, 20) - c + len);
-}
-
-// Return a 16-byte hash for 48 bytes. Quick and dirty.
-// Callers do best to use "random-looking" values for a and b.
-static pair<uint64, uint64> WeakHashLen32WithSeeds(
- uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {
- a += w;
- b = Rotate(b + a + z, 21);
- uint64 c = a;
- a += x;
- a += y;
- b += Rotate(a, 44);
- return make_pair(a + z, b + c);
-}
-
-// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
-static pair<uint64, uint64> WeakHashLen32WithSeeds(
- const char* s, uint64 a, uint64 b) {
- return WeakHashLen32WithSeeds(UNALIGNED_LOAD64(s),
- UNALIGNED_LOAD64(s + 8),
- UNALIGNED_LOAD64(s + 16),
- UNALIGNED_LOAD64(s + 24),
- a,
- b);
-}
-
-// Return an 8-byte hash for 33 to 64 bytes.
-static uint64 HashLen33to64(const char *s, size_t len) {
- uint64 z = UNALIGNED_LOAD64(s + 24);
- uint64 a = UNALIGNED_LOAD64(s) + (len + UNALIGNED_LOAD64(s + len - 16)) * k0;
- uint64 b = Rotate(a + z, 52);
- uint64 c = Rotate(a, 37);
- a += UNALIGNED_LOAD64(s + 8);
- c += Rotate(a, 7);
- a += UNALIGNED_LOAD64(s + 16);
- uint64 vf = a + z;
- uint64 vs = b + Rotate(a, 31) + c;
- a = UNALIGNED_LOAD64(s + 16) + UNALIGNED_LOAD64(s + len - 32);
- z = UNALIGNED_LOAD64(s + len - 8);
- b = Rotate(a + z, 52);
- c = Rotate(a, 37);
- a += UNALIGNED_LOAD64(s + len - 24);
- c += Rotate(a, 7);
- a += UNALIGNED_LOAD64(s + len - 16);
- uint64 wf = a + z;
- uint64 ws = b + Rotate(a, 31) + c;
- uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
- return ShiftMix(r * k0 + vs) * k2;
-}
-
-uint64 CityHash64(const char *s, size_t len) {
- if (len <= 32) {
- if (len <= 16) {
- return HashLen0to16(s, len);
- } else {
- return HashLen17to32(s, len);
- }
- } else if (len <= 64) {
- return HashLen33to64(s, len);
- }
-
- // For strings over 64 bytes we hash the end first, and then as we
- // loop we keep 56 bytes of state: v, w, x, y, and z.
- uint64 x = UNALIGNED_LOAD64(s);
- uint64 y = UNALIGNED_LOAD64(s + len - 16) ^ k1;
- uint64 z = UNALIGNED_LOAD64(s + len - 56) ^ k0;
- pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, y);
- pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, len * k1, k0);
- z += ShiftMix(v.second) * k1;
- x = Rotate(z + x, 39) * k1;
- y = Rotate(y, 33) * k1;
-
- // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
- len = (len - 1) & ~static_cast<size_t>(63);
- do {
- x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;
- y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;
- x ^= w.second;
- y ^= v.first;
- z = Rotate(z ^ w.first, 33);
- v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
- w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
- std::swap(z, x);
- s += 64;
- len -= 64;
- } while (len != 0);
- return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
- HashLen16(v.second, w.second) + x);
-}
-
-uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
- return CityHash64WithSeeds(s, len, k2, seed);
-}
-
-uint64 CityHash64WithSeeds(const char *s, size_t len,
- uint64 seed0, uint64 seed1) {
- return HashLen16(CityHash64(s, len) - seed0, seed1);
-}
-
-// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
-// of any length representable in ssize_t. Based on City and Murmur.
-static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
- uint64 a = Uint128Low64(seed);
- uint64 b = Uint128High64(seed);
- uint64 c = 0;
- uint64 d = 0;
- ssize_t l = len - 16;
- if (l <= 0) { // len <= 16
- c = b * k1 + HashLen0to16(s, len);
- d = Rotate(a + (len >= 8 ? UNALIGNED_LOAD64(s) : c), 32);
- } else { // len > 16
- c = HashLen16(UNALIGNED_LOAD64(s + len - 8) + k1, a);
- d = HashLen16(b + len, c + UNALIGNED_LOAD64(s + len - 16));
- a += d;
- do {
- a ^= ShiftMix(UNALIGNED_LOAD64(s) * k1) * k1;
- a *= k1;
- b ^= a;
- c ^= ShiftMix(UNALIGNED_LOAD64(s + 8) * k1) * k1;
- c *= k1;
- d ^= c;
- s += 16;
- l -= 16;
- } while (l > 0);
- }
- a = HashLen16(a, c);
- b = HashLen16(d, b);
- return uint128(a ^ b, HashLen16(b, a));
-}
-
-uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
- if (len < 128) {
- return CityMurmur(s, len, seed);
- }
-
- // We expect len >= 128 to be the common case. Keep 56 bytes of state:
- // v, w, x, y, and z.
- pair<uint64, uint64> v, w;
- uint64 x = Uint128Low64(seed);
- uint64 y = Uint128High64(seed);
- uint64 z = len * k1;
- v.first = Rotate(y ^ k1, 49) * k1 + UNALIGNED_LOAD64(s);
- v.second = Rotate(v.first, 42) * k1 + UNALIGNED_LOAD64(s + 8);
- w.first = Rotate(y + z, 35) * k1 + x;
- w.second = Rotate(x + UNALIGNED_LOAD64(s + 88), 53) * k1;
-
- // This is the same inner loop as CityHash64(), manually unrolled.
- do {
- x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;
- y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;
- x ^= w.second;
- y ^= v.first;
- z = Rotate(z ^ w.first, 33);
- v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
- w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
- std::swap(z, x);
- s += 64;
- x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;
- y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;
- x ^= w.second;
- y ^= v.first;
- z = Rotate(z ^ w.first, 33);
- v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
- w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
- std::swap(z, x);
- s += 64;
- len -= 128;
- } while (LIKELY(len >= 128));
- y += Rotate(w.first, 37) * k0 + z;
- x += Rotate(v.first + z, 49) * k0;
- // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
- for (size_t tail_done = 0; tail_done < len; ) {
- tail_done += 32;
- y = Rotate(y - x, 42) * k0 + v.second;
- w.first += UNALIGNED_LOAD64(s + len - tail_done + 16);
- x = Rotate(x, 49) * k0 + w.first;
- w.first += v.first;
- v = WeakHashLen32WithSeeds(s + len - tail_done, v.first, v.second);
- }
- // At this point our 48 bytes of state should contain more than
- // enough information for a strong 128-bit hash. We use two
- // different 48-byte-to-8-byte hashes to get a 16-byte final result.
- x = HashLen16(x, v.first);
- y = HashLen16(y, w.first);
- return uint128(HashLen16(x + v.second, w.second) + y,
- HashLen16(x + w.second, y + v.second));
-}
-
-uint128 CityHash128(const char *s, size_t len) {
- if (len >= 16) {
- return CityHash128WithSeed(s + 16,
- len - 16,
- uint128(UNALIGNED_LOAD64(s) ^ k3,
- UNALIGNED_LOAD64(s + 8)));
- } else if (len >= 8) {
- return CityHash128WithSeed(NULL,
- 0,
- uint128(UNALIGNED_LOAD64(s) ^ (len * k0),
- UNALIGNED_LOAD64(s + len - 8) ^ k1));
- } else {
- return CityHash128WithSeed(s, len, uint128(k0, k1));
- }
-}
-
-void CityHash64_test ( const void * key, int len, uint32_t seed, void * out )
-{
- *(uint64*)out = CityHash64WithSeed((const char *)key,len,seed);
-}
-
-void CityHash128_test ( const void * key, int len, uint32_t seed, void * out )
-{
- uint128 s(0,0);
-
- s.first = seed;
-
- *(uint128*)out = CityHash128WithSeed((const char*)key,len,s);
-}
+// Copyright (c) 2011 Google, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// CityHash, by Geoff Pike and Jyrki Alakuijala +// +// This file provides CityHash64() and related functions. +// +// It's probably possible to create even faster hash functions by +// writing a program that systematically explores some of the space of +// possible hash functions, by using SIMD instructions, or by +// compromising on hash quality. + +#include "City.h" + +#include <algorithm> +#include <string.h> // for memcpy and memset + +using namespace std; + +static uint64 UNALIGNED_LOAD64(const char *p) { + uint64 result; + memcpy(&result, p, sizeof(result)); + return result; +} + +static uint32 UNALIGNED_LOAD32(const char *p) { + uint32 result; + memcpy(&result, p, sizeof(result)); + return result; +} + +#ifndef __BIG_ENDIAN__ + +#define uint32_in_expected_order(x) (x) +#define uint64_in_expected_order(x) (x) + +#else + +#ifdef _MSC_VER +#include <stdlib.h> +#define bswap_32(x) _byteswap_ulong(x) +#define bswap_64(x) _byteswap_uint64(x) + +#elif defined(__APPLE__) +// Mac OS X / Darwin features +#include <libkern/OSByteOrder.h> +#define bswap_32(x) OSSwapInt32(x) +#define bswap_64(x) OSSwapInt64(x) + +#else +#include <byteswap.h> +#endif + +#define uint32_in_expected_order(x) (bswap_32(x)) +#define uint64_in_expected_order(x) (bswap_64(x)) + +#endif // __BIG_ENDIAN__ + +#if !defined(LIKELY) +#if defined(__GNUC__) || defined(__INTEL_COMPILER) +#define LIKELY(x) (__builtin_expect(!!(x), 1)) +#else +#define LIKELY(x) (x) +#endif +#endif + +static uint64 Fetch64(const char *p) { + return uint64_in_expected_order(UNALIGNED_LOAD64(p)); +} + +static uint32 Fetch32(const char *p) { + return uint32_in_expected_order(UNALIGNED_LOAD32(p)); +} + +// Some primes between 2^63 and 2^64 for various uses. +static const uint64 k0 = 0xc3a5c85c97cb3127ULL; +static const uint64 k1 = 0xb492b66fbe98f273ULL; +static const uint64 k2 = 0x9ae16a3b2f90404fULL; +static const uint64 k3 = 0xc949d7c7509e6557ULL; + +// Bitwise right rotate. Normally this will compile to a single +// instruction, especially if the shift is a manifest constant. +static uint64 Rotate(uint64 val, int shift) { + // Avoid shifting by 64: doing so yields an undefined result. + return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); +} + +// Equivalent to Rotate(), but requires the second arg to be non-zero. +// On x86-64, and probably others, it's possible for this to compile +// to a single instruction if both args are already in registers. +static uint64 RotateByAtLeast1(uint64 val, int shift) { + return (val >> shift) | (val << (64 - shift)); +} + +static uint64 ShiftMix(uint64 val) { + return val ^ (val >> 47); +} + +static uint64 HashLen16(uint64 u, uint64 v) { + return Hash128to64(uint128(u, v)); +} + +static uint64 HashLen0to16(const char *s, size_t len) { + if (len > 8) { + uint64 a = Fetch64(s); + uint64 b = Fetch64(s + len - 8); + return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b; + } + if (len >= 4) { + uint64 a = Fetch32(s); + return HashLen16(len + (a << 3), Fetch32(s + len - 4)); + } + if (len > 0) { + uint8 a = s[0]; + uint8 b = s[len >> 1]; + uint8 c = s[len - 1]; + uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8); + uint32 z = len + (static_cast<uint32>(c) << 2); + return ShiftMix(y * k2 ^ z * k3) * k2; + } + return k2; +} + +// This probably works well for 16-byte strings as well, but it may be overkill +// in that case. +static uint64 HashLen17to32(const char *s, size_t len) { + uint64 a = Fetch64(s) * k1; + uint64 b = Fetch64(s + 8); + uint64 c = Fetch64(s + len - 8) * k2; + uint64 d = Fetch64(s + len - 16) * k0; + return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d, + a + Rotate(b ^ k3, 20) - c + len); +} + +// Return a 16-byte hash for 48 bytes. Quick and dirty. +// Callers do best to use "random-looking" values for a and b. +static pair<uint64, uint64> WeakHashLen32WithSeeds( + uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) { + a += w; + b = Rotate(b + a + z, 21); + uint64 c = a; + a += x; + a += y; + b += Rotate(a, 44); + return make_pair(a + z, b + c); +} + +// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. +static pair<uint64, uint64> WeakHashLen32WithSeeds( + const char* s, uint64 a, uint64 b) { + return WeakHashLen32WithSeeds(Fetch64(s), + Fetch64(s + 8), + Fetch64(s + 16), + Fetch64(s + 24), + a, + b); +} + +// Return an 8-byte hash for 33 to 64 bytes. +static uint64 HashLen33to64(const char *s, size_t len) { + uint64 z = Fetch64(s + 24); + uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0; + uint64 b = Rotate(a + z, 52); + uint64 c = Rotate(a, 37); + a += Fetch64(s + 8); + c += Rotate(a, 7); + a += Fetch64(s + 16); + uint64 vf = a + z; + uint64 vs = b + Rotate(a, 31) + c; + a = Fetch64(s + 16) + Fetch64(s + len - 32); + z = Fetch64(s + len - 8); + b = Rotate(a + z, 52); + c = Rotate(a, 37); + a += Fetch64(s + len - 24); + c += Rotate(a, 7); + a += Fetch64(s + len - 16); + uint64 wf = a + z; + uint64 ws = b + Rotate(a, 31) + c; + uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0); + return ShiftMix(r * k0 + vs) * k2; +} + +uint64 CityHash64(const char *s, size_t len) { + if (len <= 32) { + if (len <= 16) { + return HashLen0to16(s, len); + } else { + return HashLen17to32(s, len); + } + } else if (len <= 64) { + return HashLen33to64(s, len); + } + + // For strings over 64 bytes we hash the end first, and then as we + // loop we keep 56 bytes of state: v, w, x, y, and z. + uint64 x = Fetch64(s + len - 40); + uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56); + uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24)); + pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z); + pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x); + x = x * k1 + Fetch64(s); + + // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. + len = (len - 1) & ~static_cast<size_t>(63); + do { + x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; + y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; + x ^= w.second; + y += v.first + Fetch64(s + 40); + z = Rotate(z + w.first, 33) * k1; + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); + std::swap(z, x); + s += 64; + len -= 64; + } while (len != 0); + return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z, + HashLen16(v.second, w.second) + x); +} + +uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) { + return CityHash64WithSeeds(s, len, k2, seed); +} + +uint64 CityHash64WithSeeds(const char *s, size_t len, + uint64 seed0, uint64 seed1) { + return HashLen16(CityHash64(s, len) - seed0, seed1); +} + +// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings +// of any length representable in signed long. Based on City and Murmur. +static uint128 CityMurmur(const char *s, size_t len, uint128 seed) { + uint64 a = Uint128Low64(seed); + uint64 b = Uint128High64(seed); + uint64 c = 0; + uint64 d = 0; + signed long l = len - 16; + if (l <= 0) { // len <= 16 + a = ShiftMix(a * k1) * k1; + c = b * k1 + HashLen0to16(s, len); + d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c)); + } else { // len > 16 + c = HashLen16(Fetch64(s + len - 8) + k1, a); + d = HashLen16(b + len, c + Fetch64(s + len - 16)); + a += d; + do { + a ^= ShiftMix(Fetch64(s) * k1) * k1; + a *= k1; + b ^= a; + c ^= ShiftMix(Fetch64(s + 8) * k1) * k1; + c *= k1; + d ^= c; + s += 16; + l -= 16; + } while (l > 0); + } + a = HashLen16(a, c); + b = HashLen16(d, b); + return uint128(a ^ b, HashLen16(b, a)); +} + +uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) { + if (len < 128) { + return CityMurmur(s, len, seed); + } + + // We expect len >= 128 to be the common case. Keep 56 bytes of state: + // v, w, x, y, and z. + pair<uint64, uint64> v, w; + uint64 x = Uint128Low64(seed); + uint64 y = Uint128High64(seed); + uint64 z = len * k1; + v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s); + v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8); + w.first = Rotate(y + z, 35) * k1 + x; + w.second = Rotate(x + Fetch64(s + 88), 53) * k1; + + // This is the same inner loop as CityHash64(), manually unrolled. + do { + x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; + y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; + x ^= w.second; + y += v.first + Fetch64(s + 40); + z = Rotate(z + w.first, 33) * k1; + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); + std::swap(z, x); + s += 64; + x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; + y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; + x ^= w.second; + y += v.first + Fetch64(s + 40); + z = Rotate(z + w.first, 33) * k1; + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); + std::swap(z, x); + s += 64; + len -= 128; + } while (LIKELY(len >= 128)); + x += Rotate(v.first + z, 49) * k0; + z += Rotate(w.first, 37) * k0; + // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. + for (size_t tail_done = 0; tail_done < len; ) { + tail_done += 32; + y = Rotate(x + y, 42) * k0 + v.second; + w.first += Fetch64(s + len - tail_done + 16); + x = x * k0 + w.first; + z += w.second + Fetch64(s + len - tail_done); + w.second += v.first; + v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second); + } + // At this point our 56 bytes of state should contain more than + // enough information for a strong 128-bit hash. We use two + // different 56-byte-to-8-byte hashes to get a 16-byte final result. + x = HashLen16(x, v.first); + y = HashLen16(y + z, w.first); + return uint128(HashLen16(x + v.second, w.second) + y, + HashLen16(x + w.second, y + v.second)); +} + +uint128 CityHash128(const char *s, size_t len) { + if (len >= 16) { + return CityHash128WithSeed(s + 16, + len - 16, + uint128(Fetch64(s) ^ k3, + Fetch64(s + 8))); + } else if (len >= 8) { + return CityHash128WithSeed(NULL, + 0, + uint128(Fetch64(s) ^ (len * k0), + Fetch64(s + len - 8) ^ k1)); + } else { + return CityHash128WithSeed(s, len, uint128(k0, k1)); + } +} + +#ifdef __SSE4_2__ +#include <nmmintrin.h> + +// Requires len >= 240. +static void CityHashCrc256Long(const char *s, size_t len, + uint32 seed, uint64 *result) { + uint64 a = Fetch64(s + 56) + k0; + uint64 b = Fetch64(s + 96) + k0; + uint64 c = result[0] = HashLen16(b, len); + uint64 d = result[1] = Fetch64(s + 120) * k0 + len; + uint64 e = Fetch64(s + 184) + seed; + uint64 f = seed; + uint64 g = 0; + uint64 h = 0; + uint64 i = 0; + uint64 j = 0; + uint64 t = c + d; + + // 240 bytes of input per iter. + size_t iters = len / 240; + len -= iters * 240; + do { +#define CHUNK(multiplier, z) \ + { \ + uint64 old_a = a; \ + a = Rotate(b, 41 ^ z) * multiplier + Fetch64(s); \ + b = Rotate(c, 27 ^ z) * multiplier + Fetch64(s + 8); \ + c = Rotate(d, 41 ^ z) * multiplier + Fetch64(s + 16); \ + d = Rotate(e, 33 ^ z) * multiplier + Fetch64(s + 24); \ + e = Rotate(t, 25 ^ z) * multiplier + Fetch64(s + 32); \ + t = old_a; \ + } \ + f = _mm_crc32_u64(f, a); \ + g = _mm_crc32_u64(g, b); \ + h = _mm_crc32_u64(h, c); \ + i = _mm_crc32_u64(i, d); \ + j = _mm_crc32_u64(j, e); \ + s += 40 + + CHUNK(1, 1); CHUNK(k0, 0); + CHUNK(1, 1); CHUNK(k0, 0); + CHUNK(1, 1); CHUNK(k0, 0); + } while (--iters > 0); + + while (len >= 40) { + CHUNK(k0, 0); + len -= 40; + } + if (len > 0) { + s = s + len - 40; + CHUNK(k0, 0); + } + j += i << 32; + a = HashLen16(a, j); + h += g << 32; + b += h; + c = HashLen16(c, f) + i; + d = HashLen16(d, e + result[0]); + j += e; + i += HashLen16(h, t); + e = HashLen16(a, d) + j; + f = HashLen16(b, c) + a; + g = HashLen16(j, i) + c; + result[0] = e + f + g + h; + a = ShiftMix((a + g) * k0) * k0 + b; + result[1] += a + result[0]; + a = ShiftMix(a * k0) * k0 + c; + result[2] = a + result[1]; + a = ShiftMix((a + e) * k0) * k0; + result[3] = a + result[2]; +} + +// Requires len < 240. +static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) { + char buf[240]; + memcpy(buf, s, len); + memset(buf + len, 0, 240 - len); + CityHashCrc256Long(buf, 240, ~static_cast<uint32>(len), result); +} + +void CityHashCrc256(const char *s, size_t len, uint64 *result) { + if (LIKELY(len >= 240)) { + CityHashCrc256Long(s, len, 0, result); + } else { + CityHashCrc256Short(s, len, result); + } +} + +uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) { + if (len <= 900) { + return CityHash128WithSeed(s, len, seed); + } else { + uint64 result[4]; + CityHashCrc256(s, len, result); + uint64 u = Uint128High64(seed) + result[0]; + uint64 v = Uint128Low64(seed) + result[1]; + return uint128(HashLen16(u, v + result[2]), + HashLen16(Rotate(v, 32), u * k0 + result[3])); + } +} + +uint128 CityHashCrc128(const char *s, size_t len) { + if (len <= 900) { + return CityHash128(s, len); + } else { + uint64 result[4]; + CityHashCrc256(s, len, result); + return uint128(result[2], result[3]); + } +} + +#endif @@ -1,97 +1,106 @@ -// Copyright (c) 2011 Google, Inc.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-//
-// CityHash Version 1, by Geoff Pike and Jyrki Alakuijala
-//
-// This file provides a few functions for hashing strings. On x86-64
-// hardware in 2011, CityHash64() is faster than other high-quality
-// hash functions, such as Murmur. This is largely due to higher
-// instruction-level parallelism. CityHash64() and CityHash128() also perform
-// well on hash-quality tests.
-//
-// CityHash128() is optimized for relatively long strings and returns
-// a 128-bit hash. For strings more than about 2000 bytes it can be
-// faster than CityHash64().
-//
-// Functions in the CityHash family are not suitable for cryptography.
-//
-// WARNING: This code has not been tested on big-endian platforms!
-// It is known to work well on little-endian platforms that have a small penalty
-// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
-//
-// By the way, for some hash functions, given strings a and b, the hash
-// of a+b is easily derived from the hashes of a and b. This property
-// doesn't hold for any hash functions in this file.
-
-#ifndef CITY_HASH_H_
-#define CITY_HASH_H_
-
-#if defined(_MSC_VER) || defined(__CYGWIN__)
-#include "pstdint.h"
-typedef int ssize_t;
-#pragma warning(disable:4267)
-#else
-#include <stdint.h>
-#endif
-
-#include <stdlib.h> // for size_t.
-#include <utility>
-
-typedef uint8_t uint8;
-typedef uint32_t uint32;
-typedef uint64_t uint64;
-typedef std::pair<uint64, uint64> uint128;
-
-inline uint64 Uint128Low64(const uint128& x) { return x.first; }
-inline uint64 Uint128High64(const uint128& x) { return x.second; }
-
-// Hash function for a byte array.
-uint64 CityHash64(const char *buf, size_t len);
-
-// Hash function for a byte array. For convenience, a 64-bit seed is also
-// hashed into the result.
-uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
-
-// Hash function for a byte array. For convenience, two seeds are also
-// hashed into the result.
-uint64 CityHash64WithSeeds(const char *buf, size_t len,
- uint64 seed0, uint64 seed1);
-
-// Hash function for a byte array.
-uint128 CityHash128(const char *s, size_t len);
-
-// Hash function for a byte array. For convenience, a 128-bit seed is also
-// hashed into the result.
-uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
-
-// Hash 128 input bits down to 64 bits of output.
-// This is intended to be a reasonably good hash function.
-inline uint64 Hash128to64(const uint128& x) {
- // Murmur-inspired hashing.
- const uint64 kMul = 0x9ddfea08eb382d69ULL;
- uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
- a ^= (a >> 47);
- uint64 b = (Uint128High64(x) ^ a) * kMul;
- b ^= (b >> 47);
- b *= kMul;
- return b;
-}
-
-#endif // CITY_HASH_H_
+// Copyright (c) 2011 Google, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// CityHash, by Geoff Pike and Jyrki Alakuijala +// +// This file provides a few functions for hashing strings. On x86-64 +// hardware in 2011, CityHash64() is faster than other high-quality +// hash functions, such as Murmur. This is largely due to higher +// instruction-level parallelism. CityHash64() and CityHash128() also perform +// well on hash-quality tests. +// +// CityHash128() is optimized for relatively long strings and returns +// a 128-bit hash. For strings more than about 2000 bytes it can be +// faster than CityHash64(). +// +// Functions in the CityHash family are not suitable for cryptography. +// +// WARNING: This code has not been tested on big-endian platforms! +// It is known to work well on little-endian platforms that have a small penalty +// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs. +// +// By the way, for some hash functions, given strings a and b, the hash +// of a+b is easily derived from the hashes of a and b. This property +// doesn't hold for any hash functions in this file. + +#ifndef CITY_HASH_H_ +#define CITY_HASH_H_ + +#include <stdlib.h> // for size_t. +#include <stdint.h> +#include <utility> + +typedef uint8_t uint8; +typedef uint32_t uint32; +typedef uint64_t uint64; +typedef std::pair<uint64, uint64> uint128; + +inline uint64 Uint128Low64(const uint128& x) { return x.first; } +inline uint64 Uint128High64(const uint128& x) { return x.second; } + +// Hash function for a byte array. +uint64 CityHash64(const char *buf, size_t len); + +// Hash function for a byte array. For convenience, a 64-bit seed is also +// hashed into the result. +uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed); + +// Hash function for a byte array. For convenience, two seeds are also +// hashed into the result. +uint64 CityHash64WithSeeds(const char *buf, size_t len, + uint64 seed0, uint64 seed1); + +// Hash function for a byte array. +uint128 CityHash128(const char *s, size_t len); + +// Hash function for a byte array. For convenience, a 128-bit seed is also +// hashed into the result. +uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed); + +// Hash 128 input bits down to 64 bits of output. +// This is intended to be a reasonably good hash function. +inline uint64 Hash128to64(const uint128& x) { + // Murmur-inspired hashing. + const uint64 kMul = 0x9ddfea08eb382d69ULL; + uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; + a ^= (a >> 47); + uint64 b = (Uint128High64(x) ^ a) * kMul; + b ^= (b >> 47); + b *= kMul; + return b; +} + +// Conditionally include declarations for versions of City that require SSE4.2 +// instructions to be available. +#ifdef __SSE4_2__ + +// Hash function for a byte array. +uint128 CityHashCrc128(const char *s, size_t len); + +// Hash function for a byte array. For convenience, a 128-bit seed is also +// hashed into the result. +uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed); + +// Hash function for a byte array. Sets result[0] ... result[3]. +void CityHashCrc256(const char *s, size_t len, uint64 *result); + +#endif // __SSE4_2__ + +#endif // CITY_HASH_H_ diff --git a/CityTest.cpp b/CityTest.cpp new file mode 100644 index 0000000..4190cc8 --- /dev/null +++ b/CityTest.cpp @@ -0,0 +1,15 @@ +#include "City.h" + +void CityHash64_test ( const void * key, int len, uint32_t seed, void * out ) +{ + *(uint64*)out = CityHash64WithSeed((const char *)key,len,seed); +} + +void CityHash128_test ( const void * key, int len, uint32_t seed, void * out ) +{ + uint128 s(0,0); + + s.first = seed; + + *(uint128*)out = CityHash128WithSeed((const char*)key,len,s); +} diff --git a/DifferentialTest.cpp b/DifferentialTest.cpp index b356085..d9067c9 100644 --- a/DifferentialTest.cpp +++ b/DifferentialTest.cpp @@ -1,3 +1,3 @@ -#include "DifferentialTest.h"
-
-//----------------------------------------------------------------------------
+#include "DifferentialTest.h" + +//---------------------------------------------------------------------------- diff --git a/DifferentialTest.h b/DifferentialTest.h index 3136cbb..824d72e 100644 --- a/DifferentialTest.h +++ b/DifferentialTest.h @@ -1,281 +1,281 @@ -//-----------------------------------------------------------------------------
-// Differential collision & distribution tests - generate a bunch of random keys,
-// see what happens to the hash value when we flip a few bits of the key.
-
-#pragma once
-
-#include "Types.h"
-#include "Stats.h" // for chooseUpToK
-#include "KeysetTest.h" // for SparseKeygenRecurse
-#include "Random.h"
-
-#include <vector>
-#include <algorithm>
-#include <stdio.h>
-
-//-----------------------------------------------------------------------------
-// Sort through the differentials, ignoring collisions that only occured once
-// (these could be false positives). If we find collisions of 3 or more, the
-// differential test fails.
-
-template < class keytype >
-bool ProcessDifferentials ( std::vector<keytype> & diffs, int reps, bool dumpCollisions )
-{
- std::sort(diffs.begin(), diffs.end());
-
- int count = 1;
- int ignore = 0;
-
- bool result = true;
-
- if(diffs.size())
- {
- keytype kp = diffs[0];
-
- for(int i = 1; i < (int)diffs.size(); i++)
- {
- if(diffs[i] == kp)
- {
- count++;
- continue;
- }
- else
- {
- if(count > 1)
- {
- result = false;
-
- double pct = 100 * (double(count) / double(reps));
-
- if(dumpCollisions)
- {
- printbits((unsigned char*)&kp,sizeof(kp));
- printf(" - %4.2f%%\n", pct );
- }
- }
- else
- {
- ignore++;
- }
-
- kp = diffs[i];
- count = 1;
- }
- }
-
- if(count > 1)
- {
- double pct = 100 * (double(count) / double(reps));
-
- if(dumpCollisions)
- {
- printbits((unsigned char*)&kp,sizeof(kp));
- printf(" - %4.2f%%\n", pct );
- }
- }
- else
- {
- ignore++;
- }
- }
-
- printf("%d total collisions, of which %d single collisions were ignored",(int)diffs.size(),ignore);
-
- if(result == false)
- {
- printf(" !!!!! ");
- }
-
- printf("\n");
- printf("\n");
-
- return result;
-}
-
-//-----------------------------------------------------------------------------
-// Check all possible keybits-choose-N differentials for collisions, report
-// ones that occur significantly more often than expected.
-
-// Random collisions can happen with probability 1 in 2^32 - if we do more than
-// 2^32 tests, we'll probably see some spurious random collisions, so don't report
-// them.
-
-template < typename keytype, typename hashtype >
-void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, hashtype & h2, int start, int bitsleft, std::vector<keytype> & diffs )
-{
- const int bits = sizeof(keytype)*8;
-
- for(int i = start; i < bits; i++)
- {
- flipbit(&k2,sizeof(k2),i);
- bitsleft--;
-
- hash(&k2,sizeof(k2),0,&h2);
-
- if(h1 == h2)
- {
- diffs.push_back(k1 ^ k2);
- }
-
- if(bitsleft)
- {
- DiffTestRecurse(hash,k1,k2,h1,h2,i+1,bitsleft,diffs);
- }
-
- flipbit(&k2,sizeof(k2),i);
- bitsleft++;
- }
-}
-
-//----------
-
-template < typename keytype, typename hashtype >
-bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions )
-{
- const int keybits = sizeof(keytype) * 8;
- const int hashbits = sizeof(hashtype) * 8;
-
- double diffcount = chooseUpToK(keybits,diffbits);
- double testcount = (diffcount * double(reps));
- double expected = testcount / pow(2.0,double(hashbits));
-
- Rand r(100);
-
- std::vector<keytype> diffs;
-
- keytype k1,k2;
- hashtype h1,h2;
-
- printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits);
- printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected);
-
- for(int i = 0; i < reps; i++)
- {
- if(i % (reps/10) == 0) printf(".");
-
- r.rand_p(&k1,sizeof(keytype));
- k2 = k1;
-
- hash(&k1,sizeof(k1),0,(uint32_t*)&h1);
-
- DiffTestRecurse<keytype,hashtype>(hash,k1,k2,h1,h2,0,diffbits,diffs);
- }
- printf("\n");
-
- bool result = true;
-
- result &= ProcessDifferentials(diffs,reps,dumpCollisions);
-
- return result;
-}
-
-//-----------------------------------------------------------------------------
-// Differential distribution test - for each N-bit input differential, generate
-// a large set of differential key pairs, hash them, and test the output
-// differentials using our distribution test code.
-
-// This is a very hard test to pass - even if the hash values are well-distributed,
-// the differences between hash values may not be. It's also not entirely relevant
-// for testing hash functions, but it's still interesting.
-
-// This test is a _lot_ of work, as it's essentially a full keyset test for
-// each of a potentially huge number of input differentials. To speed things
-// along, we do only a few distribution tests per keyset instead of the full
-// grid.
-
-// #TODO - put diagram drawing back on
-
-template < typename keytype, typename hashtype >
-void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst, double & avg )
-{
- std::vector<keytype> keys(trials);
- std::vector<hashtype> A(trials),B(trials);
-
- for(int i = 0; i < trials; i++)
- {
- rand_p(&keys[i],sizeof(keytype));
-
- hash(&keys[i],sizeof(keytype),0,(uint32_t*)&A[i]);
- }
-
- //----------
-
- std::vector<keytype> diffs;
-
- keytype temp(0);
-
- SparseKeygenRecurse<keytype>(0,diffbits,true,temp,diffs);
-
- //----------
-
- worst = 0;
- avg = 0;
-
- hashtype h2;
-
- for(size_t j = 0; j < diffs.size(); j++)
- {
- keytype & d = diffs[j];
-
- for(int i = 0; i < trials; i++)
- {
- keytype k2 = keys[i] ^ d;
-
- hash(&k2,sizeof(k2),0,&h2);
-
- B[i] = A[i] ^ h2;
- }
-
- double dworst,davg;
-
- TestDistributionFast(B,dworst,davg);
-
- avg += davg;
- worst = (dworst > worst) ? dworst : worst;
- }
-
- avg /= double(diffs.size());
-}
-
-//-----------------------------------------------------------------------------
-// Simpler differential-distribution test - for all 1-bit differentials,
-// generate random key pairs and run full distribution/collision tests on the
-// hash differentials
-
-template < typename keytype, typename hashtype >
-bool DiffDistTest2 ( pfHash hash )
-{
- Rand r(857374);
-
- int keybits = sizeof(keytype) * 8;
- const int keycount = 256*256*32;
- keytype k;
-
- std::vector<hashtype> hashes(keycount);
- hashtype h1,h2;
-
- bool result = true;
-
- for(int keybit = 0; keybit < keybits; keybit++)
- {
- printf("Testing bit %d\n",keybit);
-
- for(int i = 0; i < keycount; i++)
- {
- r.rand_p(&k,sizeof(keytype));
-
- hash(&k,sizeof(keytype),0,&h1);
- flipbit(&k,sizeof(keytype),keybit);
- hash(&k,sizeof(keytype),0,&h2);
-
- hashes[i] = h1 ^ h2;
- }
-
- result &= TestHashList<hashtype>(hashes,true,true,true);
- printf("\n");
- }
-
- return result;
-}
-
-//----------------------------------------------------------------------------
+//----------------------------------------------------------------------------- +// Differential collision & distribution tests - generate a bunch of random keys, +// see what happens to the hash value when we flip a few bits of the key. + +#pragma once + +#include "Types.h" +#include "Stats.h" // for chooseUpToK +#include "KeysetTest.h" // for SparseKeygenRecurse +#include "Random.h" + +#include <vector> +#include <algorithm> +#include <stdio.h> + +//----------------------------------------------------------------------------- +// Sort through the differentials, ignoring collisions that only occured once +// (these could be false positives). If we find collisions of 3 or more, the +// differential test fails. + +template < class keytype > +bool ProcessDifferentials ( std::vector<keytype> & diffs, int reps, bool dumpCollisions ) +{ + std::sort(diffs.begin(), diffs.end()); + + int count = 1; + int ignore = 0; + + bool result = true; + + if(diffs.size()) + { + keytype kp = diffs[0]; + + for(int i = 1; i < (int)diffs.size(); i++) + { + if(diffs[i] == kp) + { + count++; + continue; + } + else + { + if(count > 1) + { + result = false; + + double pct = 100 * (double(count) / double(reps)); + + if(dumpCollisions) + { + printbits((unsigned char*)&kp,sizeof(kp)); + printf(" - %4.2f%%\n", pct ); + } + } + else + { + ignore++; + } + + kp = diffs[i]; + count = 1; + } + } + + if(count > 1) + { + double pct = 100 * (double(count) / double(reps)); + + if(dumpCollisions) + { + printbits((unsigned char*)&kp,sizeof(kp)); + printf(" - %4.2f%%\n", pct ); + } + } + else + { + ignore++; + } + } + + printf("%d total collisions, of which %d single collisions were ignored",(int)diffs.size(),ignore); + + if(result == false) + { + printf(" !!!!! "); + } + + printf("\n"); + printf("\n"); + + return result; +} + +//----------------------------------------------------------------------------- +// Check all possible keybits-choose-N differentials for collisions, report +// ones that occur significantly more often than expected. + +// Random collisions can happen with probability 1 in 2^32 - if we do more than +// 2^32 tests, we'll probably see some spurious random collisions, so don't report +// them. + +template < typename keytype, typename hashtype > +void DiffTestRecurse ( pfHash hash, keytype & k1, keytype & k2, hashtype & h1, hashtype & h2, int start, int bitsleft, std::vector<keytype> & diffs ) +{ + const int bits = sizeof(keytype)*8; + + for(int i = start; i < bits; i++) + { + flipbit(&k2,sizeof(k2),i); + bitsleft--; + + hash(&k2,sizeof(k2),0,&h2); + + if(h1 == h2) + { + diffs.push_back(k1 ^ k2); + } + + if(bitsleft) + { + DiffTestRecurse(hash,k1,k2,h1,h2,i+1,bitsleft,diffs); + } + + flipbit(&k2,sizeof(k2),i); + bitsleft++; + } +} + +//---------- + +template < typename keytype, typename hashtype > +bool DiffTest ( pfHash hash, int diffbits, int reps, bool dumpCollisions ) +{ + const int keybits = sizeof(keytype) * 8; + const int hashbits = sizeof(hashtype) * 8; + + double diffcount = chooseUpToK(keybits,diffbits); + double testcount = (diffcount * double(reps)); + double expected = testcount / pow(2.0,double(hashbits)); + + Rand r(100); + + std::vector<keytype> diffs; + + keytype k1,k2; + hashtype h1,h2; + + printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",diffcount,diffbits,keybits,hashbits); + printf("%d reps, %0.f total tests, expecting %2.2f random collisions",reps,testcount,expected); + + for(int i = 0; i < reps; i++) + { + if(i % (reps/10) == 0) printf("."); + + r.rand_p(&k1,sizeof(keytype)); + k2 = k1; + + hash(&k1,sizeof(k1),0,(uint32_t*)&h1); + + DiffTestRecurse<keytype,hashtype>(hash,k1,k2,h1,h2,0,diffbits,diffs); + } + printf("\n"); + + bool result = true; + + result &= ProcessDifferentials(diffs,reps,dumpCollisions); + + return result; +} + +//----------------------------------------------------------------------------- +// Differential distribution test - for each N-bit input differential, generate +// a large set of differential key pairs, hash them, and test the output +// differentials using our distribution test code. + +// This is a very hard test to pass - even if the hash values are well-distributed, +// the differences between hash values may not be. It's also not entirely relevant +// for testing hash functions, but it's still interesting. + +// This test is a _lot_ of work, as it's essentially a full keyset test for +// each of a potentially huge number of input differentials. To speed things +// along, we do only a few distribution tests per keyset instead of the full +// grid. + +// #TODO - put diagram drawing back on + +template < typename keytype, typename hashtype > +void DiffDistTest ( pfHash hash, const int diffbits, int trials, double & worst, double & avg ) +{ + std::vector<keytype> keys(trials); + std::vector<hashtype> A(trials),B(trials); + + for(int i = 0; i < trials; i++) + { + rand_p(&keys[i],sizeof(keytype)); + + hash(&keys[i],sizeof(keytype),0,(uint32_t*)&A[i]); + } + + //---------- + + std::vector<keytype> diffs; + + keytype temp(0); + + SparseKeygenRecurse<keytype>(0,diffbits,true,temp,diffs); + + //---------- + + worst = 0; + avg = 0; + + hashtype h2; + + for(size_t j = 0; j < diffs.size(); j++) + { + keytype & d = diffs[j]; + + for(int i = 0; i < trials; i++) + { + keytype k2 = keys[i] ^ d; + + hash(&k2,sizeof(k2),0,&h2); + + B[i] = A[i] ^ h2; + } + + double dworst,davg; + + TestDistributionFast(B,dworst,davg); + + avg += davg; + worst = (dworst > worst) ? dworst : worst; + } + + avg /= double(diffs.size()); +} + +//----------------------------------------------------------------------------- +// Simpler differential-distribution test - for all 1-bit differentials, +// generate random key pairs and run full distribution/collision tests on the +// hash differentials + +template < typename keytype, typename hashtype > +bool DiffDistTest2 ( pfHash hash ) +{ + Rand r(857374); + + int keybits = sizeof(keytype) * 8; + const int keycount = 256*256*32; + keytype k; + + std::vector<hashtype> hashes(keycount); + hashtype h1,h2; + + bool result = true; + + for(int keybit = 0; keybit < keybits; keybit++) + { + printf("Testing bit %d\n",keybit); + + for(int i = 0; i < keycount; i++) + { + r.rand_p(&k,sizeof(keytype)); + + hash(&k,sizeof(keytype),0,&h1); + flipbit(&k,sizeof(keytype),keybit); + hash(&k,sizeof(keytype),0,&h2); + + hashes[i] = h1 ^ h2; + } + + result &= TestHashList<hashtype>(hashes,true,true,true); + printf("\n"); + } + + return result; +} + +//---------------------------------------------------------------------------- @@ -1,155 +1,155 @@ -#include "Hashes.h"
-
-#include "Random.h"
-
-
-#include <stdlib.h>
-//#include <stdint.h>
-#include <assert.h>
-//#include <emmintrin.h>
-//#include <xmmintrin.h>
-
-//----------------------------------------------------------------------------
-// fake / bad hashes
-
-void BadHash ( const void * key, int len, uint32_t seed, void * out )
-{
- uint32_t h = seed;
-
- const uint8_t * data = (const uint8_t*)key;
-
- for(int i = 0; i < len; i++)
- {
- h ^= h >> 3;
- h ^= h << 5;
- h ^= data[i];
- }
-
- *(uint32_t*)out = h;
-}
-
-void sumhash ( const void * key, int len, uint32_t seed, void * out )
-{
- uint32_t h = seed;
-
- const uint8_t * data = (const uint8_t*)key;
-
- for(int i = 0; i < len; i++)
- {
- h += data[i];
- }
-
- *(uint32_t*)out = h;
-}
-
-void sumhash32 ( const void * key, int len, uint32_t seed, void * out )
-{
- uint32_t h = seed;
-
- const uint32_t * data = (const uint32_t*)key;
-
- for(int i = 0; i < len/4; i++)
- {
- h += data[i];
- }
-
- *(uint32_t*)out = h;
-}
-
-void DoNothingHash ( const void *, int, uint32_t, void * )
-{
-}
-
-//-----------------------------------------------------------------------------
-// One-byte-at-a-time hash based on Murmur's mix
-
-uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed )
-{
- const uint8_t * data = (const uint8_t*)key;
-
- uint32_t h = seed;
-
- for(int i = 0; i < len; i++)
- {
- h ^= data[i];
- h *= 0x5bd1e995;
- h ^= h >> 15;
- }
-
- return h;
-}
-
-void MurmurOAAT_test ( const void * key, int len, uint32_t seed, void * out )
-{
- *(uint32_t*)out = MurmurOAAT(key,len,seed);
-}
-
-//----------------------------------------------------------------------------
-
-void FNV ( const void * key, int len, uint32_t seed, void * out )
-{
- unsigned int h = seed;
-
- const uint8_t * data = (const uint8_t*)key;
-
- h ^= BIG_CONSTANT(2166136261);
-
- for(int i = 0; i < len; i++)
- {
- h ^= data[i];
- h *= 16777619;
- }
-
- *(uint32_t*)out = h;
-}
-
-//-----------------------------------------------------------------------------
-
-uint32_t x17 ( const void * key, int len, uint32_t h )
-{
- const uint8_t * data = (const uint8_t*)key;
-
- for(int i = 0; i < len; ++i)
- {
- h = 17 * h + (data[i] - ' ');
- }
-
- return h ^ (h >> 16);
-}
-
-//-----------------------------------------------------------------------------
-
-uint32_t Bernstein ( const void * key, int len, uint32_t h )
-{
- const uint8_t * data = (const uint8_t*)key;
-
- for(int i = 0; i < len; ++i)
- {
- h = 33 * h + data[i];
- }
-
- return h;
-}
-
-//-----------------------------------------------------------------------------
-// Crap8 hash from http://www.team5150.com/~andrew/noncryptohashzoo/Crap8.html
-
-uint32_t Crap8( const uint8_t *key, uint32_t len, uint32_t seed ) {
- #define c8fold( a, b, y, z ) { p = (uint32_t)(a) * (uint64_t)(b); y ^= (uint32_t)p; z ^= (uint32_t)(p >> 32); }
- #define c8mix( in ) { h *= m; c8fold( in, m, k, h ); }
-
- const uint32_t m = 0x83d2e73b, n = 0x97e1cc59, *key4 = (const uint32_t *)key;
- uint32_t h = len + seed, k = n + len;
- uint64_t p;
-
- while ( len >= 8 ) { c8mix(key4[0]) c8mix(key4[1]) key4 += 2; len -= 8; }
- if ( len >= 4 ) { c8mix(key4[0]) key4 += 1; len -= 4; }
- if ( len ) { c8mix( key4[0] & ( ( 1 << ( len * 8 ) ) - 1 ) ) }
- c8fold( h ^ k, n, k, k )
- return k;
-}
-
-void Crap8_test ( const void * key, int len, uint32_t seed, void * out )
-{
- *(uint32_t*)out = Crap8((const uint8_t*)key,len,seed);
-}
+#include "Hashes.h" + +#include "Random.h" + + +#include <stdlib.h> +//#include <stdint.h> +#include <assert.h> +//#include <emmintrin.h> +//#include <xmmintrin.h> + +//---------------------------------------------------------------------------- +// fake / bad hashes + +void BadHash ( const void * key, int len, uint32_t seed, void * out ) +{ + uint32_t h = seed; + + const uint8_t * data = (const uint8_t*)key; + + for(int i = 0; i < len; i++) + { + h ^= h >> 3; + h ^= h << 5; + h ^= data[i]; + } + + *(uint32_t*)out = h; +} + +void sumhash ( const void * key, int len, uint32_t seed, void * out ) +{ + uint32_t h = seed; + + const uint8_t * data = (const uint8_t*)key; + + for(int i = 0; i < len; i++) + { + h += data[i]; + } + + *(uint32_t*)out = h; +} + +void sumhash32 ( const void * key, int len, uint32_t seed, void * out ) +{ + uint32_t h = seed; + + const uint32_t * data = (const uint32_t*)key; + + for(int i = 0; i < len/4; i++) + { + h += data[i]; + } + + *(uint32_t*)out = h; +} + +void DoNothingHash ( const void *, int, uint32_t, void * ) +{ +} + +//----------------------------------------------------------------------------- +// One-byte-at-a-time hash based on Murmur's mix + +uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed ) +{ + const uint8_t * data = (const uint8_t*)key; + + uint32_t h = seed; + + for(int i = 0; i < len; i++) + { + h ^= data[i]; + h *= 0x5bd1e995; + h ^= h >> 15; + } + + return h; +} + +void MurmurOAAT_test ( const void * key, int len, uint32_t seed, void * out ) +{ + *(uint32_t*)out = MurmurOAAT(key,len,seed); +} + +//---------------------------------------------------------------------------- + +void FNV ( const void * key, int len, uint32_t seed, void * out ) +{ + unsigned int h = seed; + + const uint8_t * data = (const uint8_t*)key; + + h ^= BIG_CONSTANT(2166136261); + + for(int i = 0; i < len; i++) + { + h ^= data[i]; + h *= 16777619; + } + + *(uint32_t*)out = h; +} + +//----------------------------------------------------------------------------- + +uint32_t x17 ( const void * key, int len, uint32_t h ) +{ + const uint8_t * data = (const uint8_t*)key; + + for(int i = 0; i < len; ++i) + { + h = 17 * h + (data[i] - ' '); + } + + return h ^ (h >> 16); +} + +//----------------------------------------------------------------------------- + +void Bernstein ( const void * key, int len, uint32_t seed, void * out ) +{ + const uint8_t * data = (const uint8_t*)key; + + for(int i = 0; i < len; ++i) + { + seed = 33 * seed + data[i]; + } + + *(uint32_t*)out = seed; +} + +//----------------------------------------------------------------------------- +// Crap8 hash from http://www.team5150.com/~andrew/noncryptohashzoo/Crap8.html + +uint32_t Crap8( const uint8_t *key, uint32_t len, uint32_t seed ) { + #define c8fold( a, b, y, z ) { p = (uint32_t)(a) * (uint64_t)(b); y ^= (uint32_t)p; z ^= (uint32_t)(p >> 32); } + #define c8mix( in ) { h *= m; c8fold( in, m, k, h ); } + + const uint32_t m = 0x83d2e73b, n = 0x97e1cc59, *key4 = (const uint32_t *)key; + uint32_t h = len + seed, k = n + len; + uint64_t p; + + while ( len >= 8 ) { c8mix(key4[0]) c8mix(key4[1]) key4 += 2; len -= 8; } + if ( len >= 4 ) { c8mix(key4[0]) key4 += 1; len -= 4; } + if ( len ) { c8mix( key4[0] & ( ( 1 << ( len * 8 ) ) - 1 ) ) } + c8fold( h ^ k, n, k, k ) + return k; +} + +void Crap8_test ( const void * key, int len, uint32_t seed, void * out ) +{ + *(uint32_t*)out = Crap8((const uint8_t*)key,len,seed); +} @@ -1,73 +1,78 @@ -#pragma once
-
-#include "Types.h"
-
-#include "MurmurHash1.h"
-#include "MurmurHash2.h"
-#include "MurmurHash3.h"
-
-//----------
-// These are _not_ hash functions (even though people tend to use crc32 as one...)
-
-void sumhash ( const void * key, int len, uint32_t seed, void * out );
-void sumhash32 ( const void * key, int len, uint32_t seed, void * out );
-
-void DoNothingHash ( const void * key, int len, uint32_t seed, void * out );
-void crc32 ( const void * key, int len, uint32_t seed, void * out );
-
-void randhash_32 ( const void * key, int len, uint32_t seed, void * out );
-void randhash_64 ( const void * key, int len, uint32_t seed, void * out );
-void randhash_128 ( const void * key, int len, uint32_t seed, void * out );
-
-//----------
-// Cryptographic hashes
-
-void md5_32 ( const void * key, int len, uint32_t seed, void * out );
-void sha1_32a ( const void * key, int len, uint32_t seed, void * out );
-
-//----------
-// General purpose hashes
-
-void FNV ( const void * key, int len, uint32_t seed, void * out );
-void SuperFastHash ( const void * key, int len, uint32_t seed, void * out );
-void lookup3_test ( const void * key, int len, uint32_t seed, void * out );
-void MurmurOAAT_test ( const void * key, int len, uint32_t seed, void * out );
-void Crap8_test ( const void * key, int len, uint32_t seed, void * out );
-void CityHash128_test ( const void * key, int len, uint32_t seed, void * out );
-void CityHash64_test ( const void * key, int len, uint32_t seed, void * out );
-
-uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed );
-
-//----------
-// MurmurHash2
-
-void MurmurHash2_test ( const void * key, int len, uint32_t seed, void * out );
-void MurmurHash2A_test ( const void * key, int len, uint32_t seed, void * out );
-
-//-----------------------------------------------------------------------------
-// Test harnesses for Murmur1/2
-
-inline void MurmurHash1_test ( const void * key, int len, uint32_t seed, void * out )
-{
- *(uint32_t*)out = MurmurHash1(key,len,seed);
-}
-
-inline void MurmurHash2_test ( const void * key, int len, uint32_t seed, void * out )
-{
- *(uint32_t*)out = MurmurHash2(key,len,seed);
-}
-
-inline void MurmurHash2A_test ( const void * key, int len, uint32_t seed, void * out )
-{
- *(uint32_t*)out = MurmurHash2A(key,len,seed);
-}
-
-inline void MurmurHash64A_test ( const void * key, int len, uint32_t seed, void * out )
-{
- *(uint64_t*)out = MurmurHash64A(key,len,seed);
-}
-
-inline void MurmurHash64B_test ( const void * key, int len, uint32_t seed, void * out )
-{
- *(uint64_t*)out = MurmurHash64B(key,len,seed);
-}
\ No newline at end of file +#pragma once + +#include "Types.h" + +#include "MurmurHash1.h" +#include "MurmurHash2.h" +#include "MurmurHash3.h" + +//---------- +// These are _not_ hash functions (even though people tend to use crc32 as one...) + +void sumhash ( const void * key, int len, uint32_t seed, void * out ); +void sumhash32 ( const void * key, int len, uint32_t seed, void * out ); + +void DoNothingHash ( const void * key, int len, uint32_t seed, void * out ); +void crc32 ( const void * key, int len, uint32_t seed, void * out ); + +void randhash_32 ( const void * key, int len, uint32_t seed, void * out ); +void randhash_64 ( const void * key, int len, uint32_t seed, void * out ); +void randhash_128 ( const void * key, int len, uint32_t seed, void * out ); + +//---------- +// Cryptographic hashes + +void md5_32 ( const void * key, int len, uint32_t seed, void * out ); +void sha1_32a ( const void * key, int len, uint32_t seed, void * out ); + +//---------- +// General purpose hashes + +void FNV ( const void * key, int len, uint32_t seed, void * out ); +void Bernstein ( const void * key, int len, uint32_t seed, void * out ); +void SuperFastHash ( const void * key, int len, uint32_t seed, void * out ); +void lookup3_test ( const void * key, int len, uint32_t seed, void * out ); +void MurmurOAAT_test ( const void * key, int len, uint32_t seed, void * out ); +void Crap8_test ( const void * key, int len, uint32_t seed, void * out ); +void CityHash128_test ( const void * key, int len, uint32_t seed, void * out ); +void CityHash64_test ( const void * key, int len, uint32_t seed, void * out ); + +void SpookyHash32_test ( const void * key, int len, uint32_t seed, void * out ); +void SpookyHash64_test ( const void * key, int len, uint32_t seed, void * out ); +void SpookyHash128_test ( const void * key, int len, uint32_t seed, void * out ); + +uint32_t MurmurOAAT ( const void * key, int len, uint32_t seed ); + +//---------- +// MurmurHash2 + +void MurmurHash2_test ( const void * key, int len, uint32_t seed, void * out ); +void MurmurHash2A_test ( const void * key, int len, uint32_t seed, void * out ); + +//----------------------------------------------------------------------------- +// Test harnesses for Murmur1/2 + +inline void MurmurHash1_test ( const void * key, int len, uint32_t seed, void * out ) +{ + *(uint32_t*)out = MurmurHash1(key,len,seed); +} + +inline void MurmurHash2_test ( const void * key, int len, uint32_t seed, void * out ) +{ + *(uint32_t*)out = MurmurHash2(key,len,seed); +} + +inline void MurmurHash2A_test ( const void * key, int len, uint32_t seed, void * out ) +{ + *(uint32_t*)out = MurmurHash2A(key,len,seed); +} + +inline void MurmurHash64A_test ( const void * key, int len, uint32_t seed, void * out ) +{ + *(uint64_t*)out = MurmurHash64A(key,len,seed); +} + +inline void MurmurHash64B_test ( const void * key, int len, uint32_t seed, void * out ) +{ + *(uint64_t*)out = MurmurHash64B(key,len,seed); +} diff --git a/KeysetTest.cpp b/KeysetTest.cpp index 5561030..b3b8a4c 100644 --- a/KeysetTest.cpp +++ b/KeysetTest.cpp @@ -1,327 +1,327 @@ -#include "KeysetTest.h"
-
-#include "Platform.h"
-#include "Random.h"
-
-#include <map>
-#include <set>
-
-//-----------------------------------------------------------------------------
-// This should hopefully be a thorough and uambiguous test of whether a hash
-// is correctly implemented on a given platform
-
-bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose )
-{
- const int hashbytes = hashbits / 8;
-
- uint8_t * key = new uint8_t[256];
- uint8_t * hashes = new uint8_t[hashbytes * 256];
- uint8_t * final = new uint8_t[hashbytes];
-
- memset(key,0,256);
- memset(hashes,0,hashbytes*256);
- memset(final,0,hashbytes);
-
- // Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as
- // the seed
-
- for(int i = 0; i < 256; i++)
- {
- key[i] = (uint8_t)i;
-
- hash(key,i,256-i,&hashes[i*hashbytes]);
- }
-
- // Then hash the result array
-
- hash(hashes,hashbytes*256,0,final);
-
- // The first four bytes of that hash, interpreted as a little-endian integer, is our
- // verification value
-
- uint32_t verification = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | (final[3] << 24);
-
- delete [] key;
- delete [] hashes;
- delete [] final;
-
- //----------
-
- if(expected != verification)
- {
- if(verbose) printf("Verification value 0x%08X : Failed! (Expected 0x%08x)\n",verification,expected);
- return false;
- }
- else
- {
- if(verbose) printf("Verification value 0x%08X : Passed!\n",verification);
- return true;
- }
-}
-
-//----------------------------------------------------------------------------
-// Basic sanity checks -
-
-// A hash function should not be reading outside the bounds of the key.
-
-// Flipping a bit of a key should, with overwhelmingly high probability,
-// result in a different hash.
-
-// Hashing the same key twice should always produce the same result.
-
-// The memory alignment of the key should not affect the hash result.
-
-bool SanityTest ( pfHash hash, const int hashbits )
-{
- printf("Running sanity check 1");
-
- Rand r(883741);
-
- bool result = true;
-
- const int hashbytes = hashbits/8;
- const int reps = 10;
- const int keymax = 128;
- const int pad = 16;
- const int buflen = keymax + pad*3;
-
- uint8_t * buffer1 = new uint8_t[buflen];
- uint8_t * buffer2 = new uint8_t[buflen];
-
- uint8_t * hash1 = new uint8_t[hashbytes];
- uint8_t * hash2 = new uint8_t[hashbytes];
-
- //----------
-
- for(int irep = 0; irep < reps; irep++)
- {
- if(irep % (reps/10) == 0) printf(".");
-
- for(int len = 4; len <= keymax; len++)
- {
- for(int offset = pad; offset < pad*2; offset++)
- {
- uint8_t * key1 = &buffer1[pad];
- uint8_t * key2 = &buffer2[pad+offset];
-
- r.rand_p(buffer1,buflen);
- r.rand_p(buffer2,buflen);
-
- memcpy(key2,key1,len);
-
- hash(key1,len,0,hash1);
-
- for(int bit = 0; bit < (len * 8); bit++)
- {
- // Flip a bit, hash the key -> we should get a different result.
-
- flipbit(key2,len,bit);
- hash(key2,len,0,hash2);
-
- if(memcmp(hash1,hash2,hashbytes) == 0)
- {
- result = false;
- }
-
- // Flip it back, hash again -> we should get the original result.
-
- flipbit(key2,len,bit);
- hash(key2,len,0,hash2);
-
- if(memcmp(hash1,hash2,hashbytes) != 0)
- {
- result = false;
- }
- }
- }
- }
- }
-
- if(result == false)
- {
- printf("*********FAIL*********\n");
- }
- else
- {
- printf("PASS\n");
- }
-
- delete [] hash1;
- delete [] hash2;
-
- return result;
-}
-
-//----------------------------------------------------------------------------
-// Appending zero bytes to a key should always cause it to produce a different
-// hash value
-
-void AppendedZeroesTest ( pfHash hash, const int hashbits )
-{
- printf("Running sanity check 2");
-
- Rand r(173994);
-
- const int hashbytes = hashbits/8;
-
- for(int rep = 0; rep < 100; rep++)
- {
- if(rep % 10 == 0) printf(".");
-
- unsigned char key[256];
-
- memset(key,0,sizeof(key));
-
- r.rand_p(key,32);
-
- uint32_t h1[16];
- uint32_t h2[16];
-
- memset(h1,0,hashbytes);
- memset(h2,0,hashbytes);
-
- for(int i = 0; i < 32; i++)
- {
- hash(key,32+i,0,h1);
-
- if(memcmp(h1,h2,hashbytes) == 0)
- {
- printf("\n*********FAIL*********\n");
- return;
- }
-
- memcpy(h2,h1,hashbytes);
- }
- }
-
- printf("PASS\n");
-}
-
-//-----------------------------------------------------------------------------
-// Generate all keys of up to N bytes containing two non-zero bytes
-
-void TwoBytesKeygen ( int maxlen, KeyCallback & c )
-{
- //----------
- // Compute # of keys
-
- int keycount = 0;
-
- for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2);
-
- keycount *= 255*255;
-
- for(int i = 2; i <= maxlen; i++) keycount += i*255;
-
- printf("Keyset 'TwoBytes' - up-to-%d-byte keys, %d total keys\n",maxlen, keycount);
-
- c.reserve(keycount);
-
- //----------
- // Add all keys with one non-zero byte
-
- uint8_t key[256];
-
- memset(key,0,256);
-
- for(int keylen = 2; keylen <= maxlen; keylen++)
- for(int byteA = 0; byteA < keylen; byteA++)
- {
- for(int valA = 1; valA <= 255; valA++)
- {
- key[byteA] = (uint8_t)valA;
-
- c(key,keylen);
- }
-
- key[byteA] = 0;
- }
-
- //----------
- // Add all keys with two non-zero bytes
-
- for(int keylen = 2; keylen <= maxlen; keylen++)
- for(int byteA = 0; byteA < keylen-1; byteA++)
- for(int byteB = byteA+1; byteB < keylen; byteB++)
- {
- for(int valA = 1; valA <= 255; valA++)
- {
- key[byteA] = (uint8_t)valA;
-
- for(int valB = 1; valB <= 255; valB++)
- {
- key[byteB] = (uint8_t)valB;
- c(key,keylen);
- }
-
- key[byteB] = 0;
- }
-
- key[byteA] = 0;
- }
-}
-
-//-----------------------------------------------------------------------------
-
-template< typename hashtype >
-void DumpCollisionMap ( CollisionMap<hashtype,ByteVec> & cmap )
-{
- typedef CollisionMap<hashtype,ByteVec> cmap_t;
-
- for(typename cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it)
- {
- const hashtype & hash = (*it).first;
-
- printf("Hash - ");
- printbytes(&hash,sizeof(hashtype));
- printf("\n");
-
- std::vector<ByteVec> & keys = (*it).second;
-
- for(int i = 0; i < (int)keys.size(); i++)
- {
- ByteVec & key = keys[i];
-
- printf("Key - ");
- printbytes(&key[0],(int)key.size());
- printf("\n");
- }
- printf("\n");
- }
-
-}
-
-// test code
-
-void ReportCollisions ( pfHash hash )
-{
- printf("Hashing keyset\n");
-
- std::vector<uint128_t> hashes;
-
- HashCallback<uint128_t> c(hash,hashes);
-
- TwoBytesKeygen(20,c);
-
- printf("%d hashes\n",(int)hashes.size());
-
- printf("Finding collisions\n");
-
- HashSet<uint128_t> collisions;
-
- FindCollisions(hashes,collisions,1000);
-
- printf("%d collisions\n",(int)collisions.size());
-
- printf("Mapping collisions\n");
-
- CollisionMap<uint128_t,ByteVec> cmap;
-
- CollisionCallback<uint128_t> c2(hash,collisions,cmap);
-
- TwoBytesKeygen(20,c2);
-
- printf("Dumping collisions\n");
-
- DumpCollisionMap(cmap);
-}
+#include "KeysetTest.h" + +#include "Platform.h" +#include "Random.h" + +#include <map> +#include <set> + +//----------------------------------------------------------------------------- +// This should hopefully be a thorough and uambiguous test of whether a hash +// is correctly implemented on a given platform + +bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose ) +{ + const int hashbytes = hashbits / 8; + + uint8_t * key = new uint8_t[256]; + uint8_t * hashes = new uint8_t[hashbytes * 256]; + uint8_t * final = new uint8_t[hashbytes]; + + memset(key,0,256); + memset(hashes,0,hashbytes*256); + memset(final,0,hashbytes); + + // Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255,using 256-N as + // the seed + + for(int i = 0; i < 256; i++) + { + key[i] = (uint8_t)i; + + hash(key,i,256-i,&hashes[i*hashbytes]); + } + + // Then hash the result array + + hash(hashes,hashbytes*256,0,final); + + // The first four bytes of that hash, interpreted as a little-endian integer, is our + // verification value + + uint32_t verification = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) | (final[3] << 24); + + delete [] key; + delete [] hashes; + delete [] final; + + //---------- + + if(expected != verification) + { + if(verbose) printf("Verification value 0x%08X : Failed! (Expected 0x%08x)\n",verification,expected); + return false; + } + else + { + if(verbose) printf("Verification value 0x%08X : Passed!\n",verification); + return true; + } +} + +//---------------------------------------------------------------------------- +// Basic sanity checks - + +// A hash function should not be reading outside the bounds of the key. + +// Flipping a bit of a key should, with overwhelmingly high probability, +// result in a different hash. + +// Hashing the same key twice should always produce the same result. + +// The memory alignment of the key should not affect the hash result. + +bool SanityTest ( pfHash hash, const int hashbits ) +{ + printf("Running sanity check 1"); + + Rand r(883741); + + bool result = true; + + const int hashbytes = hashbits/8; + const int reps = 10; + const int keymax = 128; + const int pad = 16; + const int buflen = keymax + pad*3; + + uint8_t * buffer1 = new uint8_t[buflen]; + uint8_t * buffer2 = new uint8_t[buflen]; + + uint8_t * hash1 = new uint8_t[hashbytes]; + uint8_t * hash2 = new uint8_t[hashbytes]; + + //---------- + + for(int irep = 0; irep < reps; irep++) + { + if(irep % (reps/10) == 0) printf("."); + + for(int len = 4; len <= keymax; len++) + { + for(int offset = pad; offset < pad*2; offset++) + { + uint8_t * key1 = &buffer1[pad]; + uint8_t * key2 = &buffer2[pad+offset]; + + r.rand_p(buffer1,buflen); + r.rand_p(buffer2,buflen); + + memcpy(key2,key1,len); + + hash(key1,len,0,hash1); + + for(int bit = 0; bit < (len * 8); bit++) + { + // Flip a bit, hash the key -> we should get a different result. + + flipbit(key2,len,bit); + hash(key2,len,0,hash2); + + if(memcmp(hash1,hash2,hashbytes) == 0) + { + result = false; + } + + // Flip it back, hash again -> we should get the original result. + + flipbit(key2,len,bit); + hash(key2,len,0,hash2); + + if(memcmp(hash1,hash2,hashbytes) != 0) + { + result = false; + } + } + } + } + } + + if(result == false) + { + printf("*********FAIL*********\n"); + } + else + { + printf("PASS\n"); + } + + delete [] hash1; + delete [] hash2; + + return result; +} + +//---------------------------------------------------------------------------- +// Appending zero bytes to a key should always cause it to produce a different +// hash value + +void AppendedZeroesTest ( pfHash hash, const int hashbits ) +{ + printf("Running sanity check 2"); + + Rand r(173994); + + const int hashbytes = hashbits/8; + + for(int rep = 0; rep < 100; rep++) + { + if(rep % 10 == 0) printf("."); + + unsigned char key[256]; + + memset(key,0,sizeof(key)); + + r.rand_p(key,32); + + uint32_t h1[16]; + uint32_t h2[16]; + + memset(h1,0,hashbytes); + memset(h2,0,hashbytes); + + for(int i = 0; i < 32; i++) + { + hash(key,32+i,0,h1); + + if(memcmp(h1,h2,hashbytes) == 0) + { + printf("\n*********FAIL*********\n"); + return; + } + + memcpy(h2,h1,hashbytes); + } + } + + printf("PASS\n"); +} + +//----------------------------------------------------------------------------- +// Generate all keys of up to N bytes containing two non-zero bytes + +void TwoBytesKeygen ( int maxlen, KeyCallback & c ) +{ + //---------- + // Compute # of keys + + int keycount = 0; + + for(int i = 2; i <= maxlen; i++) keycount += (int)chooseK(i,2); + + keycount *= 255*255; + + for(int i = 2; i <= maxlen; i++) keycount += i*255; + + printf("Keyset 'TwoBytes' - up-to-%d-byte keys, %d total keys\n",maxlen, keycount); + + c.reserve(keycount); + + //---------- + // Add all keys with one non-zero byte + + uint8_t key[256]; + + memset(key,0,256); + + for(int keylen = 2; keylen <= maxlen; keylen++) + for(int byteA = 0; byteA < keylen; byteA++) + { + for(int valA = 1; valA <= 255; valA++) + { + key[byteA] = (uint8_t)valA; + + c(key,keylen); + } + + key[byteA] = 0; + } + + //---------- + // Add all keys with two non-zero bytes + + for(int keylen = 2; keylen <= maxlen; keylen++) + for(int byteA = 0; byteA < keylen-1; byteA++) + for(int byteB = byteA+1; byteB < keylen; byteB++) + { + for(int valA = 1; valA <= 255; valA++) + { + key[byteA] = (uint8_t)valA; + + for(int valB = 1; valB <= 255; valB++) + { + key[byteB] = (uint8_t)valB; + c(key,keylen); + } + + key[byteB] = 0; + } + + key[byteA] = 0; + } +} + +//----------------------------------------------------------------------------- + +template< typename hashtype > +void DumpCollisionMap ( CollisionMap<hashtype,ByteVec> & cmap ) +{ + typedef CollisionMap<hashtype,ByteVec> cmap_t; + + for(typename cmap_t::iterator it = cmap.begin(); it != cmap.end(); ++it) + { + const hashtype & hash = (*it).first; + + printf("Hash - "); + printbytes(&hash,sizeof(hashtype)); + printf("\n"); + + std::vector<ByteVec> & keys = (*it).second; + + for(int i = 0; i < (int)keys.size(); i++) + { + ByteVec & key = keys[i]; + + printf("Key - "); + printbytes(&key[0],(int)key.size()); + printf("\n"); + } + printf("\n"); + } + +} + +// test code + +void ReportCollisions ( pfHash hash ) +{ + printf("Hashing keyset\n"); + + std::vector<uint128_t> hashes; + + HashCallback<uint128_t> c(hash,hashes); + + TwoBytesKeygen(20,c); + + printf("%d hashes\n",(int)hashes.size()); + + printf("Finding collisions\n"); + + HashSet<uint128_t> collisions; + + FindCollisions(hashes,collisions,1000); + + printf("%d collisions\n",(int)collisions.size()); + + printf("Mapping collisions\n"); + + CollisionMap<uint128_t,ByteVec> cmap; + + CollisionCallback<uint128_t> c2(hash,collisions,cmap); + + TwoBytesKeygen(20,c2); + + printf("Dumping collisions\n"); + + DumpCollisionMap(cmap); +} diff --git a/KeysetTest.h b/KeysetTest.h index 55d5d5f..dce54d2 100644 --- a/KeysetTest.h +++ b/KeysetTest.h @@ -1,439 +1,439 @@ -//-----------------------------------------------------------------------------
-// Keyset tests generate various sorts of difficult-to-hash keysets and compare
-// the distribution and collision frequency of the hash results against an
-// ideal random distribution
-
-// The sanity checks are also in this cpp/h
-
-#pragma once
-
-#include "Types.h"
-#include "Stats.h"
-#include "Random.h" // for rand_p
-
-#include <algorithm> // for std::swap
-#include <assert.h>
-
-//-----------------------------------------------------------------------------
-// Sanity tests
-
-bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose );
-bool SanityTest ( pfHash hash, const int hashbits );
-void AppendedZeroesTest ( pfHash hash, const int hashbits );
-
-//-----------------------------------------------------------------------------
-// Keyset 'Combination' - all possible combinations of input blocks
-
-template< typename hashtype >
-void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen,
- uint32_t * blocks, int blockcount,
- pfHash hash, std::vector<hashtype> & hashes )
-{
- if(len == maxlen) return;
-
- for(int i = 0; i < blockcount; i++)
- {
- key[len] = blocks[i];
-
- //if(len == maxlen-1)
- {
- hashtype h;
- hash(key,(len+1) * sizeof(uint32_t),0,&h);
- hashes.push_back(h);
- }
-
- //else
- {
- CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);
- }
- }
-}
-
-template< typename hashtype >
-bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
-{
- printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);
-
- //----------
-
- std::vector<hashtype> hashes;
-
- uint32_t * key = new uint32_t[maxlen];
-
- CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);
-
- delete [] key;
-
- printf("%d keys\n",(int)hashes.size());
-
- //----------
-
- bool result = true;
-
- result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
-
- printf("\n");
-
- return result;
-}
-
-//----------------------------------------------------------------------------
-// Keyset 'Permutation' - given a set of 32-bit blocks, generate keys
-// consisting of all possible permutations of those blocks
-
-template< typename hashtype >
-void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes )
-{
- if(k == blockcount-1)
- {
- hashtype h;
-
- hash(blocks,blockcount * sizeof(uint32_t),0,&h);
-
- hashes.push_back(h);
-
- return;
- }
-
- for(int i = k; i < blockcount; i++)
- {
- std::swap(blocks[k],blocks[i]);
-
- PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes);
-
- std::swap(blocks[k],blocks[i]);
- }
-}
-
-template< typename hashtype >
-bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
-{
- printf("Keyset 'Permutation' - %d blocks - ",blockcount);
-
- //----------
-
- std::vector<hashtype> hashes;
-
- PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes);
-
- printf("%d keys\n",(int)hashes.size());
-
- //----------
-
- bool result = true;
-
- result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
-
- printf("\n");
-
- return result;
-}
-
-//-----------------------------------------------------------------------------
-// Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set
-
-template < typename keytype, typename hashtype >
-void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes )
-{
- const int nbytes = sizeof(keytype);
- const int nbits = nbytes * 8;
-
- hashtype h;
-
- for(int i = start; i < nbits; i++)
- {
- flipbit(&k,nbytes,i);
-
- if(inclusive || (bitsleft == 1))
- {
- hash(&k,sizeof(keytype),0,&h);
- hashes.push_back(h);
- }
-
- if(bitsleft > 1)
- {
- SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes);
- }
-
- flipbit(&k,nbytes,i);
- }
-}
-
-//----------
-
-template < int keybits, typename hashtype >
-bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram )
-{
- printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);
-
- typedef Blob<keybits> keytype;
-
- std::vector<hashtype> hashes;
-
- keytype k;
- memset(&k,0,sizeof(k));
-
- if(inclusive)
- {
- hashtype h;
-
- hash(&k,sizeof(keytype),0,&h);
-
- hashes.push_back(h);
- }
-
- SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes);
-
- printf("%d keys\n",(int)hashes.size());
-
- bool result = true;
-
- result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
-
- printf("\n");
-
- return result;
-}
-
-//-----------------------------------------------------------------------------
-// Keyset 'Windows' - for all possible N-bit windows of a K-bit key, generate
-// all possible keys with bits set in that window
-
-template < typename keytype, typename hashtype >
-bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram )
-{
- const int keybits = sizeof(keytype) * 8;
- const int keycount = 1 << windowbits;
-
- std::vector<hashtype> hashes;
- hashes.resize(keycount);
-
- bool result = true;
-
- int testcount = keybits;
-
- printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount);
-
- for(int j = 0; j <= testcount; j++)
- {
- int minbit = j;
-
- keytype key;
-
- for(int i = 0; i < keycount; i++)
- {
- key = i;
- //key = key << minbit;
-
- lrot(&key,sizeof(keytype),minbit);
-
- hash(&key,sizeof(keytype),0,&hashes[i]);
- }
-
- printf("Window at %3d - ",j);
-
- result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram);
-
- //printf("\n");
- }
-
- return result;
-}
-
-//-----------------------------------------------------------------------------
-// Keyset 'Cyclic' - generate keys that consist solely of N repetitions of M
-// bytes.
-
-// (This keyset type is designed to make MurmurHash2 fail)
-
-template < typename hashtype >
-bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram )
-{
- printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);
-
- Rand r(483723);
-
- std::vector<hashtype> hashes;
- hashes.resize(keycount);
-
- int keyLen = cycleLen * cycleReps;
-
- uint8_t * cycle = new uint8_t[cycleLen + 16];
- uint8_t * key = new uint8_t[keyLen];
-
- //----------
-
- for(int i = 0; i < keycount; i++)
- {
- r.rand_p(cycle,cycleLen);
-
- *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
-
- for(int j = 0; j < keyLen; j++)
- {
- key[j] = cycle[j % cycleLen];
- }
-
- hash(key,keyLen,0,&hashes[i]);
- }
-
- //----------
-
- bool result = true;
-
- result &= TestHashList(hashes,true,true,drawDiagram);
- printf("\n");
-
- delete [] cycle;
- delete [] key;
-
- return result;
-}
-
-//-----------------------------------------------------------------------------
-// Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes
-
-void TwoBytesKeygen ( int maxlen, KeyCallback & c );
-
-template < typename hashtype >
-bool TwoBytesTest2 ( pfHash hash, int maxlen, bool drawDiagram )
-{
- std::vector<hashtype> hashes;
-
- HashCallback<hashtype> c(hash,hashes);
-
- TwoBytesKeygen(maxlen,c);
-
- bool result = true;
-
- result &= TestHashList(hashes,true,true,drawDiagram);
- printf("\n");
-
- return result;
-}
-
-//-----------------------------------------------------------------------------
-// Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix",
-// where "core" consists of all possible combinations of the given character
-// set of length N.
-
-template < typename hashtype >
-bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram )
-{
- const int prefixlen = (int)strlen(prefix);
- const int suffixlen = (int)strlen(suffix);
- const int corecount = (int)strlen(coreset);
-
- const int keybytes = prefixlen + corelen + suffixlen;
- const int keycount = (int)pow(double(corecount),double(corelen));
-
- printf("Keyset 'Text' - keys of form \"%s[",prefix);
- for(int i = 0; i < corelen; i++) printf("X");
- printf("]%s\" - %d keys\n",suffix,keycount);
-
- uint8_t * key = new uint8_t[keybytes+1];
-
- key[keybytes] = 0;
-
- memcpy(key,prefix,prefixlen);
- memcpy(key+prefixlen+corelen,suffix,suffixlen);
-
- //----------
-
- std::vector<hashtype> hashes;
- hashes.resize(keycount);
-
- for(int i = 0; i < keycount; i++)
- {
- int t = i;
-
- for(int j = 0; j < corelen; j++)
- {
- key[prefixlen+j] = coreset[t % corecount]; t /= corecount;
- }
-
- hash(key,keybytes,0,&hashes[i]);
- }
-
- //----------
-
- bool result = true;
-
- result &= TestHashList(hashes,true,true,drawDiagram);
-
- printf("\n");
-
- delete [] key;
-
- return result;
-}
-
-//-----------------------------------------------------------------------------
-// Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length
-
-// We reuse one block of empty bytes, otherwise the RAM cost is enormous.
-
-template < typename hashtype >
-bool ZeroKeyTest ( pfHash hash, bool drawDiagram )
-{
- int keycount = 64*1024;
-
- printf("Keyset 'Zeroes' - %d keys\n",keycount);
-
- unsigned char * nullblock = new unsigned char[keycount];
- memset(nullblock,0,keycount);
-
- //----------
-
- std::vector<hashtype> hashes;
-
- hashes.resize(keycount);
-
- for(int i = 0; i < keycount; i++)
- {
- hash(nullblock,i,0,&hashes[i]);
- }
-
- bool result = true;
-
- result &= TestHashList(hashes,true,true,drawDiagram);
-
- printf("\n");
-
- delete [] nullblock;
-
- return result;
-}
-
-//-----------------------------------------------------------------------------
-// Keyset 'Seed' - hash "the quick brown fox..." using different seeds
-
-template < typename hashtype >
-bool SeedTest ( pfHash hash, int keycount, bool drawDiagram )
-{
- printf("Keyset 'Seed' - %d keys\n",keycount);
-
- const char * text = "The quick brown fox jumps over the lazy dog";
- const int len = (int)strlen(text);
-
- //----------
-
- std::vector<hashtype> hashes;
-
- hashes.resize(keycount);
-
- for(int i = 0; i < keycount; i++)
- {
- hash(text,len,i,&hashes[i]);
- }
-
- bool result = true;
-
- result &= TestHashList(hashes,true,true,drawDiagram);
-
- printf("\n");
-
- return result;
-}
-
-//-----------------------------------------------------------------------------
+//----------------------------------------------------------------------------- +// Keyset tests generate various sorts of difficult-to-hash keysets and compare +// the distribution and collision frequency of the hash results against an +// ideal random distribution + +// The sanity checks are also in this cpp/h + +#pragma once + +#include "Types.h" +#include "Stats.h" +#include "Random.h" // for rand_p + +#include <algorithm> // for std::swap +#include <assert.h> + +//----------------------------------------------------------------------------- +// Sanity tests + +bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose ); +bool SanityTest ( pfHash hash, const int hashbits ); +void AppendedZeroesTest ( pfHash hash, const int hashbits ); + +//----------------------------------------------------------------------------- +// Keyset 'Combination' - all possible combinations of input blocks + +template< typename hashtype > +void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen, + uint32_t * blocks, int blockcount, + pfHash hash, std::vector<hashtype> & hashes ) +{ + if(len == maxlen) return; + + for(int i = 0; i < blockcount; i++) + { + key[len] = blocks[i]; + + //if(len == maxlen-1) + { + hashtype h; + hash(key,(len+1) * sizeof(uint32_t),0,&h); + hashes.push_back(h); + } + + //else + { + CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes); + } + } +} + +template< typename hashtype > +bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram ) +{ + printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount); + + //---------- + + std::vector<hashtype> hashes; + + uint32_t * key = new uint32_t[maxlen]; + + CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes); + + delete [] key; + + printf("%d keys\n",(int)hashes.size()); + + //---------- + + bool result = true; + + result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram); + + printf("\n"); + + return result; +} + +//---------------------------------------------------------------------------- +// Keyset 'Permutation' - given a set of 32-bit blocks, generate keys +// consisting of all possible permutations of those blocks + +template< typename hashtype > +void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes ) +{ + if(k == blockcount-1) + { + hashtype h; + + hash(blocks,blockcount * sizeof(uint32_t),0,&h); + + hashes.push_back(h); + + return; + } + + for(int i = k; i < blockcount; i++) + { + std::swap(blocks[k],blocks[i]); + + PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes); + + std::swap(blocks[k],blocks[i]); + } +} + +template< typename hashtype > +bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram ) +{ + printf("Keyset 'Permutation' - %d blocks - ",blockcount); + + //---------- + + std::vector<hashtype> hashes; + + PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes); + + printf("%d keys\n",(int)hashes.size()); + + //---------- + + bool result = true; + + result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram); + + printf("\n"); + + return result; +} + +//----------------------------------------------------------------------------- +// Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set + +template < typename keytype, typename hashtype > +void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes ) +{ + const int nbytes = sizeof(keytype); + const int nbits = nbytes * 8; + + hashtype h; + + for(int i = start; i < nbits; i++) + { + flipbit(&k,nbytes,i); + + if(inclusive || (bitsleft == 1)) + { + hash(&k,sizeof(keytype),0,&h); + hashes.push_back(h); + } + + if(bitsleft > 1) + { + SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes); + } + + flipbit(&k,nbytes,i); + } +} + +//---------- + +template < int keybits, typename hashtype > +bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram ) +{ + printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits); + + typedef Blob<keybits> keytype; + + std::vector<hashtype> hashes; + + keytype k; + memset(&k,0,sizeof(k)); + + if(inclusive) + { + hashtype h; + + hash(&k,sizeof(keytype),0,&h); + + hashes.push_back(h); + } + + SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes); + + printf("%d keys\n",(int)hashes.size()); + + bool result = true; + + result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram); + + printf("\n"); + + return result; +} + +//----------------------------------------------------------------------------- +// Keyset 'Windows' - for all possible N-bit windows of a K-bit key, generate +// all possible keys with bits set in that window + +template < typename keytype, typename hashtype > +bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram ) +{ + const int keybits = sizeof(keytype) * 8; + const int keycount = 1 << windowbits; + + std::vector<hashtype> hashes; + hashes.resize(keycount); + + bool result = true; + + int testcount = keybits; + + printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount); + + for(int j = 0; j <= testcount; j++) + { + int minbit = j; + + keytype key; + + for(int i = 0; i < keycount; i++) + { + key = i; + //key = key << minbit; + + lrot(&key,sizeof(keytype),minbit); + + hash(&key,sizeof(keytype),0,&hashes[i]); + } + + printf("Window at %3d - ",j); + + result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram); + + //printf("\n"); + } + + return result; +} + +//----------------------------------------------------------------------------- +// Keyset 'Cyclic' - generate keys that consist solely of N repetitions of M +// bytes. + +// (This keyset type is designed to make MurmurHash2 fail) + +template < typename hashtype > +bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram ) +{ + printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount); + + Rand r(483723); + + std::vector<hashtype> hashes; + hashes.resize(keycount); + + int keyLen = cycleLen * cycleReps; + + uint8_t * cycle = new uint8_t[cycleLen + 16]; + uint8_t * key = new uint8_t[keyLen]; + + //---------- + + for(int i = 0; i < keycount; i++) + { + r.rand_p(cycle,cycleLen); + + *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1); + + for(int j = 0; j < keyLen; j++) + { + key[j] = cycle[j % cycleLen]; + } + + hash(key,keyLen,0,&hashes[i]); + } + + //---------- + + bool result = true; + + result &= TestHashList(hashes,true,true,drawDiagram); + printf("\n"); + + delete [] cycle; + delete [] key; + + return result; +} + +//----------------------------------------------------------------------------- +// Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes + +void TwoBytesKeygen ( int maxlen, KeyCallback & c ); + +template < typename hashtype > +bool TwoBytesTest2 ( pfHash hash, int maxlen, bool drawDiagram ) +{ + std::vector<hashtype> hashes; + + HashCallback<hashtype> c(hash,hashes); + + TwoBytesKeygen(maxlen,c); + + bool result = true; + + result &= TestHashList(hashes,true,true,drawDiagram); + printf("\n"); + + return result; +} + +//----------------------------------------------------------------------------- +// Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix", +// where "core" consists of all possible combinations of the given character +// set of length N. + +template < typename hashtype > +bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram ) +{ + const int prefixlen = (int)strlen(prefix); + const int suffixlen = (int)strlen(suffix); + const int corecount = (int)strlen(coreset); + + const int keybytes = prefixlen + corelen + suffixlen; + const int keycount = (int)pow(double(corecount),double(corelen)); + + printf("Keyset 'Text' - keys of form \"%s[",prefix); + for(int i = 0; i < corelen; i++) printf("X"); + printf("]%s\" - %d keys\n",suffix,keycount); + + uint8_t * key = new uint8_t[keybytes+1]; + + key[keybytes] = 0; + + memcpy(key,prefix,prefixlen); + memcpy(key+prefixlen+corelen,suffix,suffixlen); + + //---------- + + std::vector<hashtype> hashes; + hashes.resize(keycount); + + for(int i = 0; i < keycount; i++) + { + int t = i; + + for(int j = 0; j < corelen; j++) + { + key[prefixlen+j] = coreset[t % corecount]; t /= corecount; + } + + hash(key,keybytes,0,&hashes[i]); + } + + //---------- + + bool result = true; + + result &= TestHashList(hashes,true,true,drawDiagram); + + printf("\n"); + + delete [] key; + + return result; +} + +//----------------------------------------------------------------------------- +// Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length + +// We reuse one block of empty bytes, otherwise the RAM cost is enormous. + +template < typename hashtype > +bool ZeroKeyTest ( pfHash hash, bool drawDiagram ) +{ + int keycount = 64*1024; + + printf("Keyset 'Zeroes' - %d keys\n",keycount); + + unsigned char * nullblock = new unsigned char[keycount]; + memset(nullblock,0,keycount); + + //---------- + + std::vector<hashtype> hashes; + + hashes.resize(keycount); + + for(int i = 0; i < keycount; i++) + { + hash(nullblock,i,0,&hashes[i]); + } + + bool result = true; + + result &= TestHashList(hashes,true,true,drawDiagram); + + printf("\n"); + + delete [] nullblock; + + return result; +} + +//----------------------------------------------------------------------------- +// Keyset 'Seed' - hash "the quick brown fox..." using different seeds + +template < typename hashtype > +bool SeedTest ( pfHash hash, int keycount, bool drawDiagram ) +{ + printf("Keyset 'Seed' - %d keys\n",keycount); + + const char * text = "The quick brown fox jumps over the lazy dog"; + const int len = (int)strlen(text); + + //---------- + + std::vector<hashtype> hashes; + + hashes.resize(keycount); + + for(int i = 0; i < keycount; i++) + { + hash(text,len,i,&hashes[i]); + } + + bool result = true; + + result &= TestHashList(hashes,true,true,drawDiagram); + + printf("\n"); + + return result; +} + +//----------------------------------------------------------------------------- diff --git a/MurmurHash1.cpp b/MurmurHash1.cpp index b21e9f7..8225566 100644 --- a/MurmurHash1.cpp +++ b/MurmurHash1.cpp @@ -1,174 +1,174 @@ -//-----------------------------------------------------------------------------
-// MurmurHash was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-// Note - This code makes a few assumptions about how your machine behaves -
-
-// 1. We can read a 4-byte value from any address without crashing
-// 2. sizeof(int) == 4
-
-// And it has a few limitations -
-
-// 1. It will not work incrementally.
-// 2. It will not produce the same results on little-endian and big-endian
-// machines.
-
-#include "MurmurHash1.h"
-
-//-----------------------------------------------------------------------------
-
-uint32_t MurmurHash1 ( const void * key, int len, uint32_t seed )
-{
- const unsigned int m = 0xc6a4a793;
-
- const int r = 16;
-
- unsigned int h = seed ^ (len * m);
-
- //----------
-
- const unsigned char * data = (const unsigned char *)key;
-
- while(len >= 4)
- {
- unsigned int k = *(unsigned int *)data;
-
- h += k;
- h *= m;
- h ^= h >> 16;
-
- data += 4;
- len -= 4;
- }
-
- //----------
-
- switch(len)
- {
- case 3:
- h += data[2] << 16;
- case 2:
- h += data[1] << 8;
- case 1:
- h += data[0];
- h *= m;
- h ^= h >> r;
- };
-
- //----------
-
- h *= m;
- h ^= h >> 10;
- h *= m;
- h ^= h >> 17;
-
- return h;
-}
-
-//-----------------------------------------------------------------------------
-// MurmurHash1Aligned, by Austin Appleby
-
-// Same algorithm as MurmurHash1, but only does aligned reads - should be safer
-// on certain platforms.
-
-// Performance should be equal to or better than the simple version.
-
-unsigned int MurmurHash1Aligned ( const void * key, int len, unsigned int seed )
-{
- const unsigned int m = 0xc6a4a793;
- const int r = 16;
-
- const unsigned char * data = (const unsigned char *)key;
-
- unsigned int h = seed ^ (len * m);
-
- int align = (uint64_t)data & 3;
-
- if(align && (len >= 4))
- {
- // Pre-load the temp registers
-
- unsigned int t = 0, d = 0;
-
- switch(align)
- {
- case 1: t |= data[2] << 16;
- case 2: t |= data[1] << 8;
- case 3: t |= data[0];
- }
-
- t <<= (8 * align);
-
- data += 4-align;
- len -= 4-align;
-
- int sl = 8 * (4-align);
- int sr = 8 * align;
-
- // Mix
-
- while(len >= 4)
- {
- d = *(unsigned int *)data;
- t = (t >> sr) | (d << sl);
- h += t;
- h *= m;
- h ^= h >> r;
- t = d;
-
- data += 4;
- len -= 4;
- }
-
- // Handle leftover data in temp registers
-
- int pack = len < align ? len : align;
-
- d = 0;
-
- switch(pack)
- {
- case 3: d |= data[2] << 16;
- case 2: d |= data[1] << 8;
- case 1: d |= data[0];
- case 0: h += (t >> sr) | (d << sl);
- h *= m;
- h ^= h >> r;
- }
-
- data += pack;
- len -= pack;
- }
- else
- {
- while(len >= 4)
- {
- h += *(unsigned int *)data;
- h *= m;
- h ^= h >> r;
-
- data += 4;
- len -= 4;
- }
- }
-
- //----------
- // Handle tail bytes
-
- switch(len)
- {
- case 3: h += data[2] << 16;
- case 2: h += data[1] << 8;
- case 1: h += data[0];
- h *= m;
- h ^= h >> r;
- };
-
- h *= m;
- h ^= h >> 10;
- h *= m;
- h ^= h >> 17;
-
- return h;
-}
-
+//----------------------------------------------------------------------------- +// MurmurHash was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +// Note - This code makes a few assumptions about how your machine behaves - + +// 1. We can read a 4-byte value from any address without crashing +// 2. sizeof(int) == 4 + +// And it has a few limitations - + +// 1. It will not work incrementally. +// 2. It will not produce the same results on little-endian and big-endian +// machines. + +#include "MurmurHash1.h" + +//----------------------------------------------------------------------------- + +uint32_t MurmurHash1 ( const void * key, int len, uint32_t seed ) +{ + const unsigned int m = 0xc6a4a793; + + const int r = 16; + + unsigned int h = seed ^ (len * m); + + //---------- + + const unsigned char * data = (const unsigned char *)key; + + while(len >= 4) + { + unsigned int k = *(unsigned int *)data; + + h += k; + h *= m; + h ^= h >> 16; + + data += 4; + len -= 4; + } + + //---------- + + switch(len) + { + case 3: + h += data[2] << 16; + case 2: + h += data[1] << 8; + case 1: + h += data[0]; + h *= m; + h ^= h >> r; + }; + + //---------- + + h *= m; + h ^= h >> 10; + h *= m; + h ^= h >> 17; + + return h; +} + +//----------------------------------------------------------------------------- +// MurmurHash1Aligned, by Austin Appleby + +// Same algorithm as MurmurHash1, but only does aligned reads - should be safer +// on certain platforms. + +// Performance should be equal to or better than the simple version. + +unsigned int MurmurHash1Aligned ( const void * key, int len, unsigned int seed ) +{ + const unsigned int m = 0xc6a4a793; + const int r = 16; + + const unsigned char * data = (const unsigned char *)key; + + unsigned int h = seed ^ (len * m); + + int align = (uint64_t)data & 3; + + if(align && (len >= 4)) + { + // Pre-load the temp registers + + unsigned int t = 0, d = 0; + + switch(align) + { + case 1: t |= data[2] << 16; + case 2: t |= data[1] << 8; + case 3: t |= data[0]; + } + + t <<= (8 * align); + + data += 4-align; + len -= 4-align; + + int sl = 8 * (4-align); + int sr = 8 * align; + + // Mix + + while(len >= 4) + { + d = *(unsigned int *)data; + t = (t >> sr) | (d << sl); + h += t; + h *= m; + h ^= h >> r; + t = d; + + data += 4; + len -= 4; + } + + // Handle leftover data in temp registers + + int pack = len < align ? len : align; + + d = 0; + + switch(pack) + { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + case 0: h += (t >> sr) | (d << sl); + h *= m; + h ^= h >> r; + } + + data += pack; + len -= pack; + } + else + { + while(len >= 4) + { + h += *(unsigned int *)data; + h *= m; + h ^= h >> r; + + data += 4; + len -= 4; + } + } + + //---------- + // Handle tail bytes + + switch(len) + { + case 3: h += data[2] << 16; + case 2: h += data[1] << 8; + case 1: h += data[0]; + h *= m; + h ^= h >> r; + }; + + h *= m; + h ^= h >> 10; + h *= m; + h ^= h >> 17; + + return h; +} + diff --git a/MurmurHash1.h b/MurmurHash1.h index 40ddbc4..93b08c3 100644 --- a/MurmurHash1.h +++ b/MurmurHash1.h @@ -1,34 +1,34 @@ -//-----------------------------------------------------------------------------
-// MurmurHash1 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-#ifndef _MURMURHASH1_H_
-#define _MURMURHASH1_H_
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-typedef unsigned char uint8_t;
-typedef unsigned long uint32_t;
-typedef unsigned __int64 uint64_t;
-
-// Other compilers
-
-#else // defined(_MSC_VER)
-
-#include <stdint.h>
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-
-uint32_t MurmurHash1 ( const void * key, int len, uint32_t seed );
-uint32_t MurmurHash1Aligned ( const void * key, int len, uint32_t seed );
-
-//-----------------------------------------------------------------------------
-
-#endif // _MURMURHASH1_H_
+//----------------------------------------------------------------------------- +// MurmurHash1 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +#ifndef _MURMURHASH1_H_ +#define _MURMURHASH1_H_ + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +typedef unsigned char uint8_t; +typedef unsigned long uint32_t; +typedef unsigned __int64 uint64_t; + +// Other compilers + +#else // defined(_MSC_VER) + +#include <stdint.h> + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +uint32_t MurmurHash1 ( const void * key, int len, uint32_t seed ); +uint32_t MurmurHash1Aligned ( const void * key, int len, uint32_t seed ); + +//----------------------------------------------------------------------------- + +#endif // _MURMURHASH1_H_ diff --git a/MurmurHash2.cpp b/MurmurHash2.cpp index dbb2053..cd1e53a 100644 --- a/MurmurHash2.cpp +++ b/MurmurHash2.cpp @@ -1,523 +1,523 @@ -//-----------------------------------------------------------------------------
-// MurmurHash2 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-// Note - This code makes a few assumptions about how your machine behaves -
-
-// 1. We can read a 4-byte value from any address without crashing
-// 2. sizeof(int) == 4
-
-// And it has a few limitations -
-
-// 1. It will not work incrementally.
-// 2. It will not produce the same results on little-endian and big-endian
-// machines.
-
-#include "MurmurHash2.h"
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-#define BIG_CONSTANT(x) (x)
-
-// Other compilers
-
-#else // defined(_MSC_VER)
-
-#define BIG_CONSTANT(x) (x##LLU)
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-
-uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
-{
- // 'm' and 'r' are mixing constants generated offline.
- // They're not really 'magic', they just happen to work well.
-
- const uint32_t m = 0x5bd1e995;
- const int r = 24;
-
- // Initialize the hash to a 'random' value
-
- uint32_t h = seed ^ len;
-
- // Mix 4 bytes at a time into the hash
-
- const unsigned char * data = (const unsigned char *)key;
-
- while(len >= 4)
- {
- uint32_t k = *(uint32_t*)data;
-
- k *= m;
- k ^= k >> r;
- k *= m;
-
- h *= m;
- h ^= k;
-
- data += 4;
- len -= 4;
- }
-
- // Handle the last few bytes of the input array
-
- switch(len)
- {
- case 3: h ^= data[2] << 16;
- case 2: h ^= data[1] << 8;
- case 1: h ^= data[0];
- h *= m;
- };
-
- // Do a few final mixes of the hash to ensure the last few
- // bytes are well-incorporated.
-
- h ^= h >> 13;
- h *= m;
- h ^= h >> 15;
-
- return h;
-}
-
-//-----------------------------------------------------------------------------
-// MurmurHash2, 64-bit versions, by Austin Appleby
-
-// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
-// and endian-ness issues if used across multiple platforms.
-
-// 64-bit hash for 64-bit platforms
-
-uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed )
-{
- const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
- const int r = 47;
-
- uint64_t h = seed ^ (len * m);
-
- const uint64_t * data = (const uint64_t *)key;
- const uint64_t * end = data + (len/8);
-
- while(data != end)
- {
- uint64_t k = *data++;
-
- k *= m;
- k ^= k >> r;
- k *= m;
-
- h ^= k;
- h *= m;
- }
-
- const unsigned char * data2 = (const unsigned char*)data;
-
- switch(len & 7)
- {
- case 7: h ^= uint64_t(data2[6]) << 48;
- case 6: h ^= uint64_t(data2[5]) << 40;
- case 5: h ^= uint64_t(data2[4]) << 32;
- case 4: h ^= uint64_t(data2[3]) << 24;
- case 3: h ^= uint64_t(data2[2]) << 16;
- case 2: h ^= uint64_t(data2[1]) << 8;
- case 1: h ^= uint64_t(data2[0]);
- h *= m;
- };
-
- h ^= h >> r;
- h *= m;
- h ^= h >> r;
-
- return h;
-}
-
-
-// 64-bit hash for 32-bit platforms
-
-uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed )
-{
- const uint32_t m = 0x5bd1e995;
- const int r = 24;
-
- uint32_t h1 = uint32_t(seed) ^ len;
- uint32_t h2 = uint32_t(seed >> 32);
-
- const uint32_t * data = (const uint32_t *)key;
-
- while(len >= 8)
- {
- uint32_t k1 = *data++;
- k1 *= m; k1 ^= k1 >> r; k1 *= m;
- h1 *= m; h1 ^= k1;
- len -= 4;
-
- uint32_t k2 = *data++;
- k2 *= m; k2 ^= k2 >> r; k2 *= m;
- h2 *= m; h2 ^= k2;
- len -= 4;
- }
-
- if(len >= 4)
- {
- uint32_t k1 = *data++;
- k1 *= m; k1 ^= k1 >> r; k1 *= m;
- h1 *= m; h1 ^= k1;
- len -= 4;
- }
-
- switch(len)
- {
- case 3: h2 ^= ((unsigned char*)data)[2] << 16;
- case 2: h2 ^= ((unsigned char*)data)[1] << 8;
- case 1: h2 ^= ((unsigned char*)data)[0];
- h2 *= m;
- };
-
- h1 ^= h2 >> 18; h1 *= m;
- h2 ^= h1 >> 22; h2 *= m;
- h1 ^= h2 >> 17; h1 *= m;
- h2 ^= h1 >> 19; h2 *= m;
-
- uint64_t h = h1;
-
- h = (h << 32) | h2;
-
- return h;
-}
-
-//-----------------------------------------------------------------------------
-// MurmurHash2A, by Austin Appleby
-
-// This is a variant of MurmurHash2 modified to use the Merkle-Damgard
-// construction. Bulk speed should be identical to Murmur2, small-key speed
-// will be 10%-20% slower due to the added overhead at the end of the hash.
-
-// This variant fixes a minor issue where null keys were more likely to
-// collide with each other than expected, and also makes the function
-// more amenable to incremental implementations.
-
-#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
-
-uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed )
-{
- const uint32_t m = 0x5bd1e995;
- const int r = 24;
- uint32_t l = len;
-
- const unsigned char * data = (const unsigned char *)key;
-
- uint32_t h = seed;
-
- while(len >= 4)
- {
- uint32_t k = *(uint32_t*)data;
-
- mmix(h,k);
-
- data += 4;
- len -= 4;
- }
-
- uint32_t t = 0;
-
- switch(len)
- {
- case 3: t ^= data[2] << 16;
- case 2: t ^= data[1] << 8;
- case 1: t ^= data[0];
- };
-
- mmix(h,t);
- mmix(h,l);
-
- h ^= h >> 13;
- h *= m;
- h ^= h >> 15;
-
- return h;
-}
-
-//-----------------------------------------------------------------------------
-// CMurmurHash2A, by Austin Appleby
-
-// This is a sample implementation of MurmurHash2A designed to work
-// incrementally.
-
-// Usage -
-
-// CMurmurHash2A hasher
-// hasher.Begin(seed);
-// hasher.Add(data1,size1);
-// hasher.Add(data2,size2);
-// ...
-// hasher.Add(dataN,sizeN);
-// uint32_t hash = hasher.End()
-
-class CMurmurHash2A
-{
-public:
-
- void Begin ( uint32_t seed = 0 )
- {
- m_hash = seed;
- m_tail = 0;
- m_count = 0;
- m_size = 0;
- }
-
- void Add ( const unsigned char * data, int len )
- {
- m_size += len;
-
- MixTail(data,len);
-
- while(len >= 4)
- {
- uint32_t k = *(uint32_t*)data;
-
- mmix(m_hash,k);
-
- data += 4;
- len -= 4;
- }
-
- MixTail(data,len);
- }
-
- uint32_t End ( void )
- {
- mmix(m_hash,m_tail);
- mmix(m_hash,m_size);
-
- m_hash ^= m_hash >> 13;
- m_hash *= m;
- m_hash ^= m_hash >> 15;
-
- return m_hash;
- }
-
-private:
-
- static const uint32_t m = 0x5bd1e995;
- static const int r = 24;
-
- void MixTail ( const unsigned char * & data, int & len )
- {
- while( len && ((len<4) || m_count) )
- {
- m_tail |= (*data++) << (m_count * 8);
-
- m_count++;
- len--;
-
- if(m_count == 4)
- {
- mmix(m_hash,m_tail);
- m_tail = 0;
- m_count = 0;
- }
- }
- }
-
- uint32_t m_hash;
- uint32_t m_tail;
- uint32_t m_count;
- uint32_t m_size;
-};
-
-//-----------------------------------------------------------------------------
-// MurmurHashNeutral2, by Austin Appleby
-
-// Same as MurmurHash2, but endian- and alignment-neutral.
-// Half the speed though, alas.
-
-uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed )
-{
- const uint32_t m = 0x5bd1e995;
- const int r = 24;
-
- uint32_t h = seed ^ len;
-
- const unsigned char * data = (const unsigned char *)key;
-
- while(len >= 4)
- {
- uint32_t k;
-
- k = data[0];
- k |= data[1] << 8;
- k |= data[2] << 16;
- k |= data[3] << 24;
-
- k *= m;
- k ^= k >> r;
- k *= m;
-
- h *= m;
- h ^= k;
-
- data += 4;
- len -= 4;
- }
-
- switch(len)
- {
- case 3: h ^= data[2] << 16;
- case 2: h ^= data[1] << 8;
- case 1: h ^= data[0];
- h *= m;
- };
-
- h ^= h >> 13;
- h *= m;
- h ^= h >> 15;
-
- return h;
-}
-
-//-----------------------------------------------------------------------------
-// MurmurHashAligned2, by Austin Appleby
-
-// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
-// on certain platforms.
-
-// Performance will be lower than MurmurHash2
-
-#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
-
-
-uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed )
-{
- const uint32_t m = 0x5bd1e995;
- const int r = 24;
-
- const unsigned char * data = (const unsigned char *)key;
-
- uint32_t h = seed ^ len;
-
- int align = (uint64_t)data & 3;
-
- if(align && (len >= 4))
- {
- // Pre-load the temp registers
-
- uint32_t t = 0, d = 0;
-
- switch(align)
- {
- case 1: t |= data[2] << 16;
- case 2: t |= data[1] << 8;
- case 3: t |= data[0];
- }
-
- t <<= (8 * align);
-
- data += 4-align;
- len -= 4-align;
-
- int sl = 8 * (4-align);
- int sr = 8 * align;
-
- // Mix
-
- while(len >= 4)
- {
- d = *(uint32_t *)data;
- t = (t >> sr) | (d << sl);
-
- uint32_t k = t;
-
- MIX(h,k,m);
-
- t = d;
-
- data += 4;
- len -= 4;
- }
-
- // Handle leftover data in temp registers
-
- d = 0;
-
- if(len >= align)
- {
- switch(align)
- {
- case 3: d |= data[2] << 16;
- case 2: d |= data[1] << 8;
- case 1: d |= data[0];
- }
-
- uint32_t k = (t >> sr) | (d << sl);
- MIX(h,k,m);
-
- data += align;
- len -= align;
-
- //----------
- // Handle tail bytes
-
- switch(len)
- {
- case 3: h ^= data[2] << 16;
- case 2: h ^= data[1] << 8;
- case 1: h ^= data[0];
- h *= m;
- };
- }
- else
- {
- switch(len)
- {
- case 3: d |= data[2] << 16;
- case 2: d |= data[1] << 8;
- case 1: d |= data[0];
- case 0: h ^= (t >> sr) | (d << sl);
- h *= m;
- }
- }
-
- h ^= h >> 13;
- h *= m;
- h ^= h >> 15;
-
- return h;
- }
- else
- {
- while(len >= 4)
- {
- uint32_t k = *(uint32_t *)data;
-
- MIX(h,k,m);
-
- data += 4;
- len -= 4;
- }
-
- //----------
- // Handle tail bytes
-
- switch(len)
- {
- case 3: h ^= data[2] << 16;
- case 2: h ^= data[1] << 8;
- case 1: h ^= data[0];
- h *= m;
- };
-
- h ^= h >> 13;
- h *= m;
- h ^= h >> 15;
-
- return h;
- }
-}
-
-//-----------------------------------------------------------------------------
-
+//----------------------------------------------------------------------------- +// MurmurHash2 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +// Note - This code makes a few assumptions about how your machine behaves - + +// 1. We can read a 4-byte value from any address without crashing +// 2. sizeof(int) == 4 + +// And it has a few limitations - + +// 1. It will not work incrementally. +// 2. It will not produce the same results on little-endian and big-endian +// machines. + +#include "MurmurHash2.h" + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +#define BIG_CONSTANT(x) (x) + +// Other compilers + +#else // defined(_MSC_VER) + +#define BIG_CONSTANT(x) (x##LLU) + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed ) +{ + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + + const uint32_t m = 0x5bd1e995; + const int r = 24; + + // Initialize the hash to a 'random' value + + uint32_t h = seed ^ len; + + // Mix 4 bytes at a time into the hash + + const unsigned char * data = (const unsigned char *)key; + + while(len >= 4) + { + uint32_t k = *(uint32_t*)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + // Handle the last few bytes of the input array + + switch(len) + { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +//----------------------------------------------------------------------------- +// MurmurHash2, 64-bit versions, by Austin Appleby + +// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment +// and endian-ness issues if used across multiple platforms. + +// 64-bit hash for 64-bit platforms + +uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed ) +{ + const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995); + const int r = 47; + + uint64_t h = seed ^ (len * m); + + const uint64_t * data = (const uint64_t *)key; + const uint64_t * end = data + (len/8); + + while(data != end) + { + uint64_t k = *data++; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + const unsigned char * data2 = (const unsigned char*)data; + + switch(len & 7) + { + case 7: h ^= uint64_t(data2[6]) << 48; + case 6: h ^= uint64_t(data2[5]) << 40; + case 5: h ^= uint64_t(data2[4]) << 32; + case 4: h ^= uint64_t(data2[3]) << 24; + case 3: h ^= uint64_t(data2[2]) << 16; + case 2: h ^= uint64_t(data2[1]) << 8; + case 1: h ^= uint64_t(data2[0]); + h *= m; + }; + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +} + + +// 64-bit hash for 32-bit platforms + +uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed ) +{ + const uint32_t m = 0x5bd1e995; + const int r = 24; + + uint32_t h1 = uint32_t(seed) ^ len; + uint32_t h2 = uint32_t(seed >> 32); + + const uint32_t * data = (const uint32_t *)key; + + while(len >= 8) + { + uint32_t k1 = *data++; + k1 *= m; k1 ^= k1 >> r; k1 *= m; + h1 *= m; h1 ^= k1; + len -= 4; + + uint32_t k2 = *data++; + k2 *= m; k2 ^= k2 >> r; k2 *= m; + h2 *= m; h2 ^= k2; + len -= 4; + } + + if(len >= 4) + { + uint32_t k1 = *data++; + k1 *= m; k1 ^= k1 >> r; k1 *= m; + h1 *= m; h1 ^= k1; + len -= 4; + } + + switch(len) + { + case 3: h2 ^= ((unsigned char*)data)[2] << 16; + case 2: h2 ^= ((unsigned char*)data)[1] << 8; + case 1: h2 ^= ((unsigned char*)data)[0]; + h2 *= m; + }; + + h1 ^= h2 >> 18; h1 *= m; + h2 ^= h1 >> 22; h2 *= m; + h1 ^= h2 >> 17; h1 *= m; + h2 ^= h1 >> 19; h2 *= m; + + uint64_t h = h1; + + h = (h << 32) | h2; + + return h; +} + +//----------------------------------------------------------------------------- +// MurmurHash2A, by Austin Appleby + +// This is a variant of MurmurHash2 modified to use the Merkle-Damgard +// construction. Bulk speed should be identical to Murmur2, small-key speed +// will be 10%-20% slower due to the added overhead at the end of the hash. + +// This variant fixes a minor issue where null keys were more likely to +// collide with each other than expected, and also makes the function +// more amenable to incremental implementations. + +#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } + +uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed ) +{ + const uint32_t m = 0x5bd1e995; + const int r = 24; + uint32_t l = len; + + const unsigned char * data = (const unsigned char *)key; + + uint32_t h = seed; + + while(len >= 4) + { + uint32_t k = *(uint32_t*)data; + + mmix(h,k); + + data += 4; + len -= 4; + } + + uint32_t t = 0; + + switch(len) + { + case 3: t ^= data[2] << 16; + case 2: t ^= data[1] << 8; + case 1: t ^= data[0]; + }; + + mmix(h,t); + mmix(h,l); + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +//----------------------------------------------------------------------------- +// CMurmurHash2A, by Austin Appleby + +// This is a sample implementation of MurmurHash2A designed to work +// incrementally. + +// Usage - + +// CMurmurHash2A hasher +// hasher.Begin(seed); +// hasher.Add(data1,size1); +// hasher.Add(data2,size2); +// ... +// hasher.Add(dataN,sizeN); +// uint32_t hash = hasher.End() + +class CMurmurHash2A +{ +public: + + void Begin ( uint32_t seed = 0 ) + { + m_hash = seed; + m_tail = 0; + m_count = 0; + m_size = 0; + } + + void Add ( const unsigned char * data, int len ) + { + m_size += len; + + MixTail(data,len); + + while(len >= 4) + { + uint32_t k = *(uint32_t*)data; + + mmix(m_hash,k); + + data += 4; + len -= 4; + } + + MixTail(data,len); + } + + uint32_t End ( void ) + { + mmix(m_hash,m_tail); + mmix(m_hash,m_size); + + m_hash ^= m_hash >> 13; + m_hash *= m; + m_hash ^= m_hash >> 15; + + return m_hash; + } + +private: + + static const uint32_t m = 0x5bd1e995; + static const int r = 24; + + void MixTail ( const unsigned char * & data, int & len ) + { + while( len && ((len<4) || m_count) ) + { + m_tail |= (*data++) << (m_count * 8); + + m_count++; + len--; + + if(m_count == 4) + { + mmix(m_hash,m_tail); + m_tail = 0; + m_count = 0; + } + } + } + + uint32_t m_hash; + uint32_t m_tail; + uint32_t m_count; + uint32_t m_size; +}; + +//----------------------------------------------------------------------------- +// MurmurHashNeutral2, by Austin Appleby + +// Same as MurmurHash2, but endian- and alignment-neutral. +// Half the speed though, alas. + +uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed ) +{ + const uint32_t m = 0x5bd1e995; + const int r = 24; + + uint32_t h = seed ^ len; + + const unsigned char * data = (const unsigned char *)key; + + while(len >= 4) + { + uint32_t k; + + k = data[0]; + k |= data[1] << 8; + k |= data[2] << 16; + k |= data[3] << 24; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + switch(len) + { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +//----------------------------------------------------------------------------- +// MurmurHashAligned2, by Austin Appleby + +// Same algorithm as MurmurHash2, but only does aligned reads - should be safer +// on certain platforms. + +// Performance will be lower than MurmurHash2 + +#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } + + +uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed ) +{ + const uint32_t m = 0x5bd1e995; + const int r = 24; + + const unsigned char * data = (const unsigned char *)key; + + uint32_t h = seed ^ len; + + int align = (uint64_t)data & 3; + + if(align && (len >= 4)) + { + // Pre-load the temp registers + + uint32_t t = 0, d = 0; + + switch(align) + { + case 1: t |= data[2] << 16; + case 2: t |= data[1] << 8; + case 3: t |= data[0]; + } + + t <<= (8 * align); + + data += 4-align; + len -= 4-align; + + int sl = 8 * (4-align); + int sr = 8 * align; + + // Mix + + while(len >= 4) + { + d = *(uint32_t *)data; + t = (t >> sr) | (d << sl); + + uint32_t k = t; + + MIX(h,k,m); + + t = d; + + data += 4; + len -= 4; + } + + // Handle leftover data in temp registers + + d = 0; + + if(len >= align) + { + switch(align) + { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + } + + uint32_t k = (t >> sr) | (d << sl); + MIX(h,k,m); + + data += align; + len -= align; + + //---------- + // Handle tail bytes + + switch(len) + { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + } + else + { + switch(len) + { + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + case 0: h ^= (t >> sr) | (d << sl); + h *= m; + } + } + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } + else + { + while(len >= 4) + { + uint32_t k = *(uint32_t *)data; + + MIX(h,k,m); + + data += 4; + len -= 4; + } + + //---------- + // Handle tail bytes + + switch(len) + { + case 3: h ^= data[2] << 16; + case 2: h ^= data[1] << 8; + case 1: h ^= data[0]; + h *= m; + }; + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; + } +} + +//----------------------------------------------------------------------------- + diff --git a/MurmurHash2.h b/MurmurHash2.h index 38dbbeb..32993c2 100644 --- a/MurmurHash2.h +++ b/MurmurHash2.h @@ -1,39 +1,39 @@ -//-----------------------------------------------------------------------------
-// MurmurHash2 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-#ifndef _MURMURHASH2_H_
-#define _MURMURHASH2_H_
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-typedef unsigned char uint8_t;
-typedef unsigned long uint32_t;
-typedef unsigned __int64 uint64_t;
-
-// Other compilers
-
-#else // defined(_MSC_VER)
-
-#include <stdint.h>
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-
-uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed );
-uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed );
-uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed );
-uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed );
-uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed );
-uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed );
-
-//-----------------------------------------------------------------------------
-
-#endif // _MURMURHASH2_H_
-
+//----------------------------------------------------------------------------- +// MurmurHash2 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +#ifndef _MURMURHASH2_H_ +#define _MURMURHASH2_H_ + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +typedef unsigned char uint8_t; +typedef unsigned long uint32_t; +typedef unsigned __int64 uint64_t; + +// Other compilers + +#else // defined(_MSC_VER) + +#include <stdint.h> + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed ); +uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed ); +uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed ); +uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed ); +uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed ); +uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed ); + +//----------------------------------------------------------------------------- + +#endif // _MURMURHASH2_H_ + diff --git a/MurmurHash3.cpp b/MurmurHash3.cpp index 0bf7386..09ffb26 100644 --- a/MurmurHash3.cpp +++ b/MurmurHash3.cpp @@ -1,335 +1,335 @@ -//-----------------------------------------------------------------------------
-// MurmurHash3 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-// Note - The x86 and x64 versions do _not_ produce the same results, as the
-// algorithms are optimized for their respective platforms. You can still
-// compile and run any of them on any platform, but your performance with the
-// non-native version will be less than optimal.
-
-#include "MurmurHash3.h"
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-#define FORCE_INLINE __forceinline
-
-#include <stdlib.h>
-
-#define ROTL32(x,y) _rotl(x,y)
-#define ROTL64(x,y) _rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x)
-
-// Other compilers
-
-#else // defined(_MSC_VER)
-
-#define FORCE_INLINE __attribute__((always_inline))
-
-inline uint32_t rotl32 ( uint32_t x, int8_t r )
-{
- return (x << r) | (x >> (32 - r));
-}
-
-inline uint64_t rotl64 ( uint64_t x, int8_t r )
-{
- return (x << r) | (x >> (64 - r));
-}
-
-#define ROTL32(x,y) rotl32(x,y)
-#define ROTL64(x,y) rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x##LLU)
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-// Block read - if your platform needs to do endian-swapping or can only
-// handle aligned reads, do the conversion here
-
-FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
-{
- return p[i];
-}
-
-FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
-{
- return p[i];
-}
-
-//-----------------------------------------------------------------------------
-// Finalization mix - force all bits of a hash block to avalanche
-
-FORCE_INLINE uint32_t fmix ( uint32_t h )
-{
- h ^= h >> 16;
- h *= 0x85ebca6b;
- h ^= h >> 13;
- h *= 0xc2b2ae35;
- h ^= h >> 16;
-
- return h;
-}
-
-//----------
-
-FORCE_INLINE uint64_t fmix ( uint64_t k )
-{
- k ^= k >> 33;
- k *= BIG_CONSTANT(0xff51afd7ed558ccd);
- k ^= k >> 33;
- k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
- k ^= k >> 33;
-
- return k;
-}
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_32 ( const void * key, int len,
- uint32_t seed, void * out )
-{
- const uint8_t * data = (const uint8_t*)key;
- const int nblocks = len / 4;
-
- uint32_t h1 = seed;
-
- uint32_t c1 = 0xcc9e2d51;
- uint32_t c2 = 0x1b873593;
-
- //----------
- // body
-
- const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
-
- for(int i = -nblocks; i; i++)
- {
- uint32_t k1 = getblock(blocks,i);
-
- k1 *= c1;
- k1 = ROTL32(k1,15);
- k1 *= c2;
-
- h1 ^= k1;
- h1 = ROTL32(h1,13);
- h1 = h1*5+0xe6546b64;
- }
-
- //----------
- // tail
-
- const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
-
- uint32_t k1 = 0;
-
- switch(len & 3)
- {
- case 3: k1 ^= tail[2] << 16;
- case 2: k1 ^= tail[1] << 8;
- case 1: k1 ^= tail[0];
- k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
- };
-
- //----------
- // finalization
-
- h1 ^= len;
-
- h1 = fmix(h1);
-
- *(uint32_t*)out = h1;
-}
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_128 ( const void * key, const int len,
- uint32_t seed, void * out )
-{
- const uint8_t * data = (const uint8_t*)key;
- const int nblocks = len / 16;
-
- uint32_t h1 = seed;
- uint32_t h2 = seed;
- uint32_t h3 = seed;
- uint32_t h4 = seed;
-
- uint32_t c1 = 0x239b961b;
- uint32_t c2 = 0xab0e9789;
- uint32_t c3 = 0x38b34ae5;
- uint32_t c4 = 0xa1e38b93;
-
- //----------
- // body
-
- const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
-
- for(int i = -nblocks; i; i++)
- {
- uint32_t k1 = getblock(blocks,i*4+0);
- uint32_t k2 = getblock(blocks,i*4+1);
- uint32_t k3 = getblock(blocks,i*4+2);
- uint32_t k4 = getblock(blocks,i*4+3);
-
- k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
-
- h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
-
- k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
-
- h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
-
- k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
-
- h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
-
- k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
-
- h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
- }
-
- //----------
- // tail
-
- const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
-
- uint32_t k1 = 0;
- uint32_t k2 = 0;
- uint32_t k3 = 0;
- uint32_t k4 = 0;
-
- switch(len & 15)
- {
- case 15: k4 ^= tail[14] << 16;
- case 14: k4 ^= tail[13] << 8;
- case 13: k4 ^= tail[12] << 0;
- k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
-
- case 12: k3 ^= tail[11] << 24;
- case 11: k3 ^= tail[10] << 16;
- case 10: k3 ^= tail[ 9] << 8;
- case 9: k3 ^= tail[ 8] << 0;
- k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
-
- case 8: k2 ^= tail[ 7] << 24;
- case 7: k2 ^= tail[ 6] << 16;
- case 6: k2 ^= tail[ 5] << 8;
- case 5: k2 ^= tail[ 4] << 0;
- k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
-
- case 4: k1 ^= tail[ 3] << 24;
- case 3: k1 ^= tail[ 2] << 16;
- case 2: k1 ^= tail[ 1] << 8;
- case 1: k1 ^= tail[ 0] << 0;
- k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
- };
-
- //----------
- // finalization
-
- h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
-
- h1 += h2; h1 += h3; h1 += h4;
- h2 += h1; h3 += h1; h4 += h1;
-
- h1 = fmix(h1);
- h2 = fmix(h2);
- h3 = fmix(h3);
- h4 = fmix(h4);
-
- h1 += h2; h1 += h3; h1 += h4;
- h2 += h1; h3 += h1; h4 += h1;
-
- ((uint32_t*)out)[0] = h1;
- ((uint32_t*)out)[1] = h2;
- ((uint32_t*)out)[2] = h3;
- ((uint32_t*)out)[3] = h4;
-}
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x64_128 ( const void * key, const int len,
- const uint32_t seed, void * out )
-{
- const uint8_t * data = (const uint8_t*)key;
- const int nblocks = len / 16;
-
- uint64_t h1 = seed;
- uint64_t h2 = seed;
-
- uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
- uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
-
- //----------
- // body
-
- const uint64_t * blocks = (const uint64_t *)(data);
-
- for(int i = 0; i < nblocks; i++)
- {
- uint64_t k1 = getblock(blocks,i*2+0);
- uint64_t k2 = getblock(blocks,i*2+1);
-
- k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
-
- h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
-
- k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
-
- h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
- }
-
- //----------
- // tail
-
- const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
-
- uint64_t k1 = 0;
- uint64_t k2 = 0;
-
- switch(len & 15)
- {
- case 15: k2 ^= uint64_t(tail[14]) << 48;
- case 14: k2 ^= uint64_t(tail[13]) << 40;
- case 13: k2 ^= uint64_t(tail[12]) << 32;
- case 12: k2 ^= uint64_t(tail[11]) << 24;
- case 11: k2 ^= uint64_t(tail[10]) << 16;
- case 10: k2 ^= uint64_t(tail[ 9]) << 8;
- case 9: k2 ^= uint64_t(tail[ 8]) << 0;
- k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
-
- case 8: k1 ^= uint64_t(tail[ 7]) << 56;
- case 7: k1 ^= uint64_t(tail[ 6]) << 48;
- case 6: k1 ^= uint64_t(tail[ 5]) << 40;
- case 5: k1 ^= uint64_t(tail[ 4]) << 32;
- case 4: k1 ^= uint64_t(tail[ 3]) << 24;
- case 3: k1 ^= uint64_t(tail[ 2]) << 16;
- case 2: k1 ^= uint64_t(tail[ 1]) << 8;
- case 1: k1 ^= uint64_t(tail[ 0]) << 0;
- k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
- };
-
- //----------
- // finalization
-
- h1 ^= len; h2 ^= len;
-
- h1 += h2;
- h2 += h1;
-
- h1 = fmix(h1);
- h2 = fmix(h2);
-
- h1 += h2;
- h2 += h1;
-
- ((uint64_t*)out)[0] = h1;
- ((uint64_t*)out)[1] = h2;
-}
-
-//-----------------------------------------------------------------------------
-
+//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +// Note - The x86 and x64 versions do _not_ produce the same results, as the +// algorithms are optimized for their respective platforms. You can still +// compile and run any of them on any platform, but your performance with the +// non-native version will be less than optimal. + +#include "MurmurHash3.h" + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +#define FORCE_INLINE __forceinline + +#include <stdlib.h> + +#define ROTL32(x,y) _rotl(x,y) +#define ROTL64(x,y) _rotl64(x,y) + +#define BIG_CONSTANT(x) (x) + +// Other compilers + +#else // defined(_MSC_VER) + +#define FORCE_INLINE __attribute__((always_inline)) + +inline uint32_t rotl32 ( uint32_t x, int8_t r ) +{ + return (x << r) | (x >> (32 - r)); +} + +inline uint64_t rotl64 ( uint64_t x, int8_t r ) +{ + return (x << r) | (x >> (64 - r)); +} + +#define ROTL32(x,y) rotl32(x,y) +#define ROTL64(x,y) rotl64(x,y) + +#define BIG_CONSTANT(x) (x##LLU) + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- +// Block read - if your platform needs to do endian-swapping or can only +// handle aligned reads, do the conversion here + +FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i ) +{ + return p[i]; +} + +FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i ) +{ + return p[i]; +} + +//----------------------------------------------------------------------------- +// Finalization mix - force all bits of a hash block to avalanche + +FORCE_INLINE uint32_t fmix ( uint32_t h ) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +//---------- + +FORCE_INLINE uint64_t fmix ( uint64_t k ) +{ + k ^= k >> 33; + k *= BIG_CONSTANT(0xff51afd7ed558ccd); + k ^= k >> 33; + k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); + k ^= k >> 33; + + return k; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_32 ( const void * key, int len, + uint32_t seed, void * out ) +{ + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + uint32_t c1 = 0xcc9e2d51; + uint32_t c2 = 0x1b873593; + + //---------- + // body + + const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); + + for(int i = -nblocks; i; i++) + { + uint32_t k1 = getblock(blocks,i); + + k1 *= c1; + k1 = ROTL32(k1,15); + k1 *= c2; + + h1 ^= k1; + h1 = ROTL32(h1,13); + h1 = h1*5+0xe6546b64; + } + + //---------- + // tail + + const uint8_t * tail = (const uint8_t*)(data + nblocks*4); + + uint32_t k1 = 0; + + switch(len & 3) + { + case 3: k1 ^= tail[2] << 16; + case 2: k1 ^= tail[1] << 8; + case 1: k1 ^= tail[0]; + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + + h1 = fmix(h1); + + *(uint32_t*)out = h1; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_128 ( const void * key, const int len, + uint32_t seed, void * out ) +{ + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 16; + + uint32_t h1 = seed; + uint32_t h2 = seed; + uint32_t h3 = seed; + uint32_t h4 = seed; + + uint32_t c1 = 0x239b961b; + uint32_t c2 = 0xab0e9789; + uint32_t c3 = 0x38b34ae5; + uint32_t c4 = 0xa1e38b93; + + //---------- + // body + + const uint32_t * blocks = (const uint32_t *)(data + nblocks*16); + + for(int i = -nblocks; i; i++) + { + uint32_t k1 = getblock(blocks,i*4+0); + uint32_t k2 = getblock(blocks,i*4+1); + uint32_t k3 = getblock(blocks,i*4+2); + uint32_t k4 = getblock(blocks,i*4+3); + + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; + + h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b; + + k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; + + h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747; + + k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; + + h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35; + + k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; + + h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17; + } + + //---------- + // tail + + const uint8_t * tail = (const uint8_t*)(data + nblocks*16); + + uint32_t k1 = 0; + uint32_t k2 = 0; + uint32_t k3 = 0; + uint32_t k4 = 0; + + switch(len & 15) + { + case 15: k4 ^= tail[14] << 16; + case 14: k4 ^= tail[13] << 8; + case 13: k4 ^= tail[12] << 0; + k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; + + case 12: k3 ^= tail[11] << 24; + case 11: k3 ^= tail[10] << 16; + case 10: k3 ^= tail[ 9] << 8; + case 9: k3 ^= tail[ 8] << 0; + k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; + + case 8: k2 ^= tail[ 7] << 24; + case 7: k2 ^= tail[ 6] << 16; + case 6: k2 ^= tail[ 5] << 8; + case 5: k2 ^= tail[ 4] << 0; + k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; + + case 4: k1 ^= tail[ 3] << 24; + case 3: k1 ^= tail[ 2] << 16; + case 2: k1 ^= tail[ 1] << 8; + case 1: k1 ^= tail[ 0] << 0; + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + h1 = fmix(h1); + h2 = fmix(h2); + h3 = fmix(h3); + h4 = fmix(h4); + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + ((uint32_t*)out)[0] = h1; + ((uint32_t*)out)[1] = h2; + ((uint32_t*)out)[2] = h3; + ((uint32_t*)out)[3] = h4; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x64_128 ( const void * key, const int len, + const uint32_t seed, void * out ) +{ + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 16; + + uint64_t h1 = seed; + uint64_t h2 = seed; + + uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); + uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); + + //---------- + // body + + const uint64_t * blocks = (const uint64_t *)(data); + + for(int i = 0; i < nblocks; i++) + { + uint64_t k1 = getblock(blocks,i*2+0); + uint64_t k2 = getblock(blocks,i*2+1); + + k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; + + h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; + + k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; + + h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; + } + + //---------- + // tail + + const uint8_t * tail = (const uint8_t*)(data + nblocks*16); + + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch(len & 15) + { + case 15: k2 ^= uint64_t(tail[14]) << 48; + case 14: k2 ^= uint64_t(tail[13]) << 40; + case 13: k2 ^= uint64_t(tail[12]) << 32; + case 12: k2 ^= uint64_t(tail[11]) << 24; + case 11: k2 ^= uint64_t(tail[10]) << 16; + case 10: k2 ^= uint64_t(tail[ 9]) << 8; + case 9: k2 ^= uint64_t(tail[ 8]) << 0; + k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; + + case 8: k1 ^= uint64_t(tail[ 7]) << 56; + case 7: k1 ^= uint64_t(tail[ 6]) << 48; + case 6: k1 ^= uint64_t(tail[ 5]) << 40; + case 5: k1 ^= uint64_t(tail[ 4]) << 32; + case 4: k1 ^= uint64_t(tail[ 3]) << 24; + case 3: k1 ^= uint64_t(tail[ 2]) << 16; + case 2: k1 ^= uint64_t(tail[ 1]) << 8; + case 1: k1 ^= uint64_t(tail[ 0]) << 0; + k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = fmix(h1); + h2 = fmix(h2); + + h1 += h2; + h2 += h1; + + ((uint64_t*)out)[0] = h1; + ((uint64_t*)out)[1] = h2; +} + +//----------------------------------------------------------------------------- + diff --git a/MurmurHash3.h b/MurmurHash3.h index 58e9820..54e9d3f 100644 --- a/MurmurHash3.h +++ b/MurmurHash3.h @@ -1,37 +1,37 @@ -//-----------------------------------------------------------------------------
-// MurmurHash3 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-#ifndef _MURMURHASH3_H_
-#define _MURMURHASH3_H_
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-typedef unsigned char uint8_t;
-typedef unsigned long uint32_t;
-typedef unsigned __int64 uint64_t;
-
-// Other compilers
-
-#else // defined(_MSC_VER)
-
-#include <stdint.h>
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out );
-
-void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
-
-void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
-
-//-----------------------------------------------------------------------------
-
-#endif // _MURMURHASH3_H_
+//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +#ifndef _MURMURHASH3_H_ +#define _MURMURHASH3_H_ + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +typedef unsigned char uint8_t; +typedef unsigned long uint32_t; +typedef unsigned __int64 uint64_t; + +// Other compilers + +#else // defined(_MSC_VER) + +#include <stdint.h> + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out ); + +void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out ); + +void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out ); + +//----------------------------------------------------------------------------- + +#endif // _MURMURHASH3_H_ diff --git a/Platform.cpp b/Platform.cpp index d90dab8..d7f5fb8 100644 --- a/Platform.cpp +++ b/Platform.cpp @@ -1,42 +1,42 @@ -#include "Platform.h"
-
-#include <stdio.h>
-
-void testRDTSC ( void )
-{
- int64_t temp = rdtsc();
-
- printf("%d",(int)temp);
-}
-
-#if defined(_MSC_VER)
-
-#include <windows.h>
-
-void SetAffinity ( int cpu )
-{
- SetProcessAffinityMask(GetCurrentProcess(),cpu);
- SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
-}
-
-#else
-
-#include <sched.h>
-
-void SetAffinity ( int /*cpu*/ )
-{
-#ifndef __CYGWIN__
- cpu_set_t mask;
-
- CPU_ZERO(&mask);
-
- CPU_SET(2,&mask);
-
- if( sched_setaffinity(0,sizeof(mask),&mask) == -1)
- {
- printf("WARNING: Could not set CPU affinity\n");
- }
-#endif
-}
-
-#endif
+#include "Platform.h" + +#include <stdio.h> + +void testRDTSC ( void ) +{ + int64_t temp = rdtsc(); + + printf("%d",(int)temp); +} + +#if defined(_MSC_VER) + +#include <windows.h> + +void SetAffinity ( int cpu ) +{ + SetProcessAffinityMask(GetCurrentProcess(),cpu); + SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST); +} + +#else + +#include <sched.h> + +void SetAffinity ( int /*cpu*/ ) +{ +#ifndef __CYGWIN__ + cpu_set_t mask; + + CPU_ZERO(&mask); + + CPU_SET(2,&mask); + + if( sched_setaffinity(0,sizeof(mask),&mask) == -1) + { + printf("WARNING: Could not set CPU affinity\n"); + } +#endif +} + +#endif @@ -1,85 +1,94 @@ -//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-#pragma once
-
-void SetAffinity ( int cpu );
-
-//-----------------------------------------------------------------------------
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-#define FORCE_INLINE __forceinline
-#define NEVER_INLINE __declspec(noinline)
-
-#include <stdlib.h>
-#include <math.h> // Has to be included before intrin.h or VC complains about 'ceil'
-#include <intrin.h> // for __rdtsc
-#include "pstdint.h"
-
-#define ROTL32(x,y) _rotl(x,y)
-#define ROTL64(x,y) _rotl64(x,y)
-#define ROTR32(x,y) _rotr(x,y)
-#define ROTR64(x,y) _rotr64(x,y)
-
-#pragma warning(disable : 4127) // "conditional expression is constant" in the if()s for avalanchetest
-#pragma warning(disable : 4100)
-#pragma warning(disable : 4702)
-
-#define BIG_CONSTANT(x) (x)
-
-// RDTSC == Read Time Stamp Counter
-
-#define rdtsc() __rdtsc()
-
-//-----------------------------------------------------------------------------
-// Other compilers
-
-#else // defined(_MSC_VER)
-
-#include <stdint.h>
-
-#define FORCE_INLINE __attribute__((always_inline))
-#define NEVER_INLINE __attribute__((noinline))
-
-inline uint32_t rotl32 ( uint32_t x, int8_t r )
-{
- return (x << r) | (x >> (32 - r));
-}
-
-inline uint64_t rotl64 ( uint64_t x, int8_t r )
-{
- return (x << r) | (x >> (64 - r));
-}
-
-inline uint32_t rotr32 ( uint32_t x, int8_t r )
-{
- return (x >> r) | (x << (32 - r));
-}
-
-inline uint64_t rotr64 ( uint64_t x, int8_t r )
-{
- return (x >> r) | (x << (64 - r));
-}
-
-#define ROTL32(x,y) rotl32(x,y)
-#define ROTL64(x,y) rotl64(x,y)
-#define ROTR32(x,y) rotr32(x,y)
-#define ROTR64(x,y) rotr64(x,y)
-
-#define BIG_CONSTANT(x) (x##LLU)
-
-__inline__ unsigned long long int rdtsc()
-{
- unsigned long long int x;
- __asm__ volatile ("rdtsc" : "=A" (x));
- return x;
-}
-
-#include <strings.h>
-#define _stricmp strcasecmp
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
+//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +#pragma once + +void SetAffinity ( int cpu ); + +//----------------------------------------------------------------------------- +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +#define FORCE_INLINE __forceinline +#define NEVER_INLINE __declspec(noinline) + +#include <stdlib.h> +#include <math.h> // Has to be included before intrin.h or VC complains about 'ceil' +#include <intrin.h> // for __rdtsc +#include "pstdint.h" + +#define ROTL32(x,y) _rotl(x,y) +#define ROTL64(x,y) _rotl64(x,y) +#define ROTR32(x,y) _rotr(x,y) +#define ROTR64(x,y) _rotr64(x,y) + +#pragma warning(disable : 4127) // "conditional expression is constant" in the if()s for avalanchetest +#pragma warning(disable : 4100) +#pragma warning(disable : 4702) + +#define BIG_CONSTANT(x) (x) + +// RDTSC == Read Time Stamp Counter + +#define rdtsc() __rdtsc() + +//----------------------------------------------------------------------------- +// Other compilers + +#else // defined(_MSC_VER) + +#include <stdint.h> + +#define FORCE_INLINE __attribute__((always_inline)) +#define NEVER_INLINE __attribute__((noinline)) + +inline uint32_t rotl32 ( uint32_t x, int8_t r ) +{ + return (x << r) | (x >> (32 - r)); +} + +inline uint64_t rotl64 ( uint64_t x, int8_t r ) +{ + return (x << r) | (x >> (64 - r)); +} + +inline uint32_t rotr32 ( uint32_t x, int8_t r ) +{ + return (x >> r) | (x << (32 - r)); +} + +inline uint64_t rotr64 ( uint64_t x, int8_t r ) +{ + return (x >> r) | (x << (64 - r)); +} + +#define ROTL32(x,y) rotl32(x,y) +#define ROTL64(x,y) rotl64(x,y) +#define ROTR32(x,y) rotr32(x,y) +#define ROTR64(x,y) rotr64(x,y) + +#define BIG_CONSTANT(x) (x##LLU) + +__inline__ unsigned long long int rdtsc() +{ +#ifdef __x86_64__ + unsigned int a, d; + __asm__ volatile ("rdtsc" : "=a" (a), "=d" (d)); + return (unsigned long)a | ((unsigned long)d << 32); +#else +#ifndef __i386__ +#error Must be x86 either 32-bit or 64-bit. +#endif + unsigned long long int x; + __asm__ volatile ("rdtsc" : "=A" (x)); + return x; +#endif +} + +#include <strings.h> +#define _stricmp strcasecmp + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- @@ -1,8 +1,8 @@ -#include "Random.h"
-
-Rand g_rand1(1);
-Rand g_rand2(2);
-Rand g_rand3(3);
-Rand g_rand4(4);
-
-//-----------------------------------------------------------------------------
+#include "Random.h" + +Rand g_rand1(1); +Rand g_rand2(2); +Rand g_rand3(3); +Rand g_rand4(4); + +//----------------------------------------------------------------------------- @@ -1,117 +1,117 @@ -#pragma once
-
-#include "Types.h"
-
-//-----------------------------------------------------------------------------
-// Xorshift RNG based on code by George Marsaglia
-// http://en.wikipedia.org/wiki/Xorshift
-
-struct Rand
-{
- uint32_t x;
- uint32_t y;
- uint32_t z;
- uint32_t w;
-
- Rand()
- {
- reseed(uint32_t(0));
- }
-
- Rand( uint32_t seed )
- {
- reseed(seed);
- }
-
- void reseed ( uint32_t seed )
- {
- x = 0x498b3bc5 ^ seed;
- y = 0;
- z = 0;
- w = 0;
-
- for(int i = 0; i < 10; i++) mix();
- }
-
- void reseed ( uint64_t seed )
- {
- x = 0x498b3bc5 ^ (uint32_t)(seed >> 0);
- y = 0x5a05089a ^ (uint32_t)(seed >> 32);
- z = 0;
- w = 0;
-
- for(int i = 0; i < 10; i++) mix();
- }
-
- //-----------------------------------------------------------------------------
-
- void mix ( void )
- {
- uint32_t t = x ^ (x << 11);
- x = y; y = z; z = w;
- w = w ^ (w >> 19) ^ t ^ (t >> 8);
- }
-
- uint32_t rand_u32 ( void )
- {
- mix();
-
- return x;
- }
-
- uint64_t rand_u64 ( void )
- {
- mix();
-
- uint64_t a = x;
- uint64_t b = y;
-
- return (a << 32) | b;
- }
-
- void rand_p ( void * blob, int bytes )
- {
- uint32_t * blocks = reinterpret_cast<uint32_t*>(blob);
-
- while(bytes >= 4)
- {
- blocks[0] = rand_u32();
- blocks++;
- bytes -= 4;
- }
-
- uint8_t * tail = reinterpret_cast<uint8_t*>(blocks);
-
- for(int i = 0; i < bytes; i++)
- {
- tail[i] = (uint8_t)rand_u32();
- }
- }
-};
-
-//-----------------------------------------------------------------------------
-
-extern Rand g_rand1;
-
-inline uint32_t rand_u32 ( void ) { return g_rand1.rand_u32(); }
-inline uint64_t rand_u64 ( void ) { return g_rand1.rand_u64(); }
-
-inline void rand_p ( void * blob, int bytes )
-{
- uint32_t * blocks = (uint32_t*)blob;
-
- while(bytes >= 4)
- {
- *blocks++ = rand_u32();
- bytes -= 4;
- }
-
- uint8_t * tail = (uint8_t*)blocks;
-
- for(int i = 0; i < bytes; i++)
- {
- tail[i] = (uint8_t)rand_u32();
- }
-}
-
-//-----------------------------------------------------------------------------
+#pragma once + +#include "Types.h" + +//----------------------------------------------------------------------------- +// Xorshift RNG based on code by George Marsaglia +// http://en.wikipedia.org/wiki/Xorshift + +struct Rand +{ + uint32_t x; + uint32_t y; + uint32_t z; + uint32_t w; + + Rand() + { + reseed(uint32_t(0)); + } + + Rand( uint32_t seed ) + { + reseed(seed); + } + + void reseed ( uint32_t seed ) + { + x = 0x498b3bc5 ^ seed; + y = 0; + z = 0; + w = 0; + + for(int i = 0; i < 10; i++) mix(); + } + + void reseed ( uint64_t seed ) + { + x = 0x498b3bc5 ^ (uint32_t)(seed >> 0); + y = 0x5a05089a ^ (uint32_t)(seed >> 32); + z = 0; + w = 0; + + for(int i = 0; i < 10; i++) mix(); + } + + //----------------------------------------------------------------------------- + + void mix ( void ) + { + uint32_t t = x ^ (x << 11); + x = y; y = z; z = w; + w = w ^ (w >> 19) ^ t ^ (t >> 8); + } + + uint32_t rand_u32 ( void ) + { + mix(); + + return x; + } + + uint64_t rand_u64 ( void ) + { + mix(); + + uint64_t a = x; + uint64_t b = y; + + return (a << 32) | b; + } + + void rand_p ( void * blob, int bytes ) + { + uint32_t * blocks = reinterpret_cast<uint32_t*>(blob); + + while(bytes >= 4) + { + blocks[0] = rand_u32(); + blocks++; + bytes -= 4; + } + + uint8_t * tail = reinterpret_cast<uint8_t*>(blocks); + + for(int i = 0; i < bytes; i++) + { + tail[i] = (uint8_t)rand_u32(); + } + } +}; + +//----------------------------------------------------------------------------- + +extern Rand g_rand1; + +inline uint32_t rand_u32 ( void ) { return g_rand1.rand_u32(); } +inline uint64_t rand_u64 ( void ) { return g_rand1.rand_u64(); } + +inline void rand_p ( void * blob, int bytes ) +{ + uint32_t * blocks = (uint32_t*)blob; + + while(bytes >= 4) + { + *blocks++ = rand_u32(); + bytes -= 4; + } + + uint8_t * tail = (uint8_t*)blocks; + + for(int i = 0; i < bytes; i++) + { + tail[i] = (uint8_t)rand_u32(); + } +} + +//----------------------------------------------------------------------------- diff --git a/SpeedTest.cpp b/SpeedTest.cpp index 2265389..d91f6e4 100644 --- a/SpeedTest.cpp +++ b/SpeedTest.cpp @@ -1,242 +1,242 @@ -#include "SpeedTest.h"
-
-#include "Random.h"
-
-#include <stdio.h> // for printf
-#include <memory.h> // for memset
-#include <math.h> // for sqrt
-#include <algorithm> // for sort
-
-//-----------------------------------------------------------------------------
-// We view our timing values as a series of random variables V that has been
-// contaminated with occasional outliers due to cache misses, thread
-// preemption, etcetera. To filter out the outliers, we search for the largest
-// subset of V such that all its values are within three standard deviations
-// of the mean.
-
-double CalcMean ( std::vector<double> & v )
-{
- double mean = 0;
-
- for(int i = 0; i < (int)v.size(); i++)
- {
- mean += v[i];
- }
-
- mean /= double(v.size());
-
- return mean;
-}
-
-double CalcMean ( std::vector<double> & v, int a, int b )
-{
- double mean = 0;
-
- for(int i = a; i <= b; i++)
- {
- mean += v[i];
- }
-
- mean /= (b-a+1);
-
- return mean;
-}
-
-double CalcStdv ( std::vector<double> & v, int a, int b )
-{
- double mean = CalcMean(v,a,b);
-
- double stdv = 0;
-
- for(int i = a; i <= b; i++)
- {
- double x = v[i] - mean;
-
- stdv += x*x;
- }
-
- stdv = sqrt(stdv / (b-a+1));
-
- return stdv;
-}
-
-// Return true if the largest value in v[0,len) is more than three
-// standard deviations from the mean
-
-bool ContainsOutlier ( std::vector<double> & v, size_t len )
-{
- double mean = 0;
-
- for(size_t i = 0; i < len; i++)
- {
- mean += v[i];
- }
-
- mean /= double(len);
-
- double stdv = 0;
-
- for(size_t i = 0; i < len; i++)
- {
- double x = v[i] - mean;
- stdv += x*x;
- }
-
- stdv = sqrt(stdv / double(len));
-
- double cutoff = mean + stdv*3;
-
- return v[len-1] > cutoff;
-}
-
-// Do a binary search to find the largest subset of v that does not contain
-// outliers.
-
-void FilterOutliers ( std::vector<double> & v )
-{
- std::sort(v.begin(),v.end());
-
- size_t len = 0;
-
- for(size_t x = 0x40000000; x; x = x >> 1 )
- {
- if((len | x) >= v.size()) continue;
-
- if(!ContainsOutlier(v,len | x))
- {
- len |= x;
- }
- }
-
- v.resize(len);
-}
-
-// Iteratively tighten the set to find a subset that does not contain
-// outliers. I'm not positive this works correctly in all cases.
-
-void FilterOutliers2 ( std::vector<double> & v )
-{
- std::sort(v.begin(),v.end());
-
- int a = 0;
- int b = (int)(v.size() - 1);
-
- for(int i = 0; i < 10; i++)
- {
- //printf("%d %d\n",a,b);
-
- double mean = CalcMean(v,a,b);
- double stdv = CalcStdv(v,a,b);
-
- double cutA = mean - stdv*3;
- double cutB = mean + stdv*3;
-
- while((a < b) && (v[a] < cutA)) a++;
- while((b > a) && (v[b] > cutB)) b--;
- }
-
- std::vector<double> v2;
-
- v2.insert(v2.begin(),v.begin()+a,v.begin()+b+1);
-
- v.swap(v2);
-}
-
-//-----------------------------------------------------------------------------
-// We really want the rdtsc() calls to bracket the function call as tightly
-// as possible, but that's hard to do portably. We'll try and get as close as
-// possible by marking the function as NEVER_INLINE (to keep the optimizer from
-// moving it) and marking the timing variables as "volatile register".
-
-NEVER_INLINE int64_t timehash ( pfHash hash, const void * key, int len, int seed )
-{
- volatile register int64_t begin,end;
-
- uint32_t temp[16];
-
- begin = rdtsc();
-
- hash(key,len,seed,temp);
-
- end = rdtsc();
-
- return end-begin;
-}
-
-//-----------------------------------------------------------------------------
-
-double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int blocksize, const int align )
-{
- Rand r(seed);
-
- uint8_t * buf = new uint8_t[blocksize + 512];
-
- uint64_t t1 = reinterpret_cast<uint64_t>(buf);
-
- t1 = (t1 + 255) & BIG_CONSTANT(0xFFFFFFFFFFFFFF00);
- t1 += align;
-
- uint8_t * block = reinterpret_cast<uint8_t*>(t1);
-
- r.rand_p(block,blocksize);
-
- //----------
-
- std::vector<double> times;
- times.reserve(trials);
-
- for(int itrial = 0; itrial < trials; itrial++)
- {
- r.rand_p(block,blocksize);
-
- double t = (double)timehash(hash,block,blocksize,itrial);
-
- if(t > 0) times.push_back(t);
- }
-
- //----------
-
- std::sort(times.begin(),times.end());
-
- FilterOutliers(times);
-
- delete [] buf;
-
- return CalcMean(times);
-}
-
-//-----------------------------------------------------------------------------
-// 256k blocks seem to give the best results.
-
-void BulkSpeedTest ( pfHash hash, uint32_t seed )
-{
- const int trials = 2999;
- const int blocksize = 256 * 1024;
-
- printf("Bulk speed test - %d-byte keys\n",blocksize);
-
- for(int align = 0; align < 8; align++)
- {
- double cycles = SpeedTest(hash,seed,trials,blocksize,align);
-
- double bestbpc = double(blocksize)/cycles;
-
- double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
- printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
- }
-}
-
-//-----------------------------------------------------------------------------
-
-void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & /*outCycles*/ )
-{
- const int trials = 999999;
-
- if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
-
- double cycles = SpeedTest(hash,seed,trials,keysize,0);
-
- printf("%8.2f cycles/hash\n",cycles);
-}
-
-//-----------------------------------------------------------------------------
+#include "SpeedTest.h" + +#include "Random.h" + +#include <stdio.h> // for printf +#include <memory.h> // for memset +#include <math.h> // for sqrt +#include <algorithm> // for sort + +//----------------------------------------------------------------------------- +// We view our timing values as a series of random variables V that has been +// contaminated with occasional outliers due to cache misses, thread +// preemption, etcetera. To filter out the outliers, we search for the largest +// subset of V such that all its values are within three standard deviations +// of the mean. + +double CalcMean ( std::vector<double> & v ) +{ + double mean = 0; + + for(int i = 0; i < (int)v.size(); i++) + { + mean += v[i]; + } + + mean /= double(v.size()); + + return mean; +} + +double CalcMean ( std::vector<double> & v, int a, int b ) +{ + double mean = 0; + + for(int i = a; i <= b; i++) + { + mean += v[i]; + } + + mean /= (b-a+1); + + return mean; +} + +double CalcStdv ( std::vector<double> & v, int a, int b ) +{ + double mean = CalcMean(v,a,b); + + double stdv = 0; + + for(int i = a; i <= b; i++) + { + double x = v[i] - mean; + + stdv += x*x; + } + + stdv = sqrt(stdv / (b-a+1)); + + return stdv; +} + +// Return true if the largest value in v[0,len) is more than three +// standard deviations from the mean + +bool ContainsOutlier ( std::vector<double> & v, size_t len ) +{ + double mean = 0; + + for(size_t i = 0; i < len; i++) + { + mean += v[i]; + } + + mean /= double(len); + + double stdv = 0; + + for(size_t i = 0; i < len; i++) + { + double x = v[i] - mean; + stdv += x*x; + } + + stdv = sqrt(stdv / double(len)); + + double cutoff = mean + stdv*3; + + return v[len-1] > cutoff; +} + +// Do a binary search to find the largest subset of v that does not contain +// outliers. + +void FilterOutliers ( std::vector<double> & v ) +{ + std::sort(v.begin(),v.end()); + + size_t len = 0; + + for(size_t x = 0x40000000; x; x = x >> 1 ) + { + if((len | x) >= v.size()) continue; + + if(!ContainsOutlier(v,len | x)) + { + len |= x; + } + } + + v.resize(len); +} + +// Iteratively tighten the set to find a subset that does not contain +// outliers. I'm not positive this works correctly in all cases. + +void FilterOutliers2 ( std::vector<double> & v ) +{ + std::sort(v.begin(),v.end()); + + int a = 0; + int b = (int)(v.size() - 1); + + for(int i = 0; i < 10; i++) + { + //printf("%d %d\n",a,b); + + double mean = CalcMean(v,a,b); + double stdv = CalcStdv(v,a,b); + + double cutA = mean - stdv*3; + double cutB = mean + stdv*3; + + while((a < b) && (v[a] < cutA)) a++; + while((b > a) && (v[b] > cutB)) b--; + } + + std::vector<double> v2; + + v2.insert(v2.begin(),v.begin()+a,v.begin()+b+1); + + v.swap(v2); +} + +//----------------------------------------------------------------------------- +// We really want the rdtsc() calls to bracket the function call as tightly +// as possible, but that's hard to do portably. We'll try and get as close as +// possible by marking the function as NEVER_INLINE (to keep the optimizer from +// moving it) and marking the timing variables as "volatile register". + +NEVER_INLINE int64_t timehash ( pfHash hash, const void * key, int len, int seed ) +{ + volatile register int64_t begin,end; + + uint32_t temp[16]; + + begin = rdtsc(); + + hash(key,len,seed,temp); + + end = rdtsc(); + + return end-begin; +} + +//----------------------------------------------------------------------------- + +double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int blocksize, const int align ) +{ + Rand r(seed); + + uint8_t * buf = new uint8_t[blocksize + 512]; + + uint64_t t1 = reinterpret_cast<uint64_t>(buf); + + t1 = (t1 + 255) & BIG_CONSTANT(0xFFFFFFFFFFFFFF00); + t1 += align; + + uint8_t * block = reinterpret_cast<uint8_t*>(t1); + + r.rand_p(block,blocksize); + + //---------- + + std::vector<double> times; + times.reserve(trials); + + for(int itrial = 0; itrial < trials; itrial++) + { + r.rand_p(block,blocksize); + + double t = (double)timehash(hash,block,blocksize,itrial); + + if(t > 0) times.push_back(t); + } + + //---------- + + std::sort(times.begin(),times.end()); + + FilterOutliers(times); + + delete [] buf; + + return CalcMean(times); +} + +//----------------------------------------------------------------------------- +// 256k blocks seem to give the best results. + +void BulkSpeedTest ( pfHash hash, uint32_t seed ) +{ + const int trials = 2999; + const int blocksize = 256 * 1024; + + printf("Bulk speed test - %d-byte keys\n",blocksize); + + for(int align = 0; align < 8; align++) + { + double cycles = SpeedTest(hash,seed,trials,blocksize,align); + + double bestbpc = double(blocksize)/cycles; + + double bestbps = (bestbpc * 3000000000.0 / 1048576.0); + printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps); + } +} + +//----------------------------------------------------------------------------- + +void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & /*outCycles*/ ) +{ + const int trials = 999999; + + if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize); + + double cycles = SpeedTest(hash,seed,trials,keysize,0); + + printf("%8.2f cycles/hash\n",cycles); +} + +//----------------------------------------------------------------------------- diff --git a/SpeedTest.h b/SpeedTest.h index b881a78..7bd2167 100644 --- a/SpeedTest.h +++ b/SpeedTest.h @@ -1,8 +1,8 @@ -#pragma once
-
-#include "Types.h"
-
-void BulkSpeedTest ( pfHash hash, uint32_t seed );
-void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & outCycles );
-
-//-----------------------------------------------------------------------------
+#pragma once + +#include "Types.h" + +void BulkSpeedTest ( pfHash hash, uint32_t seed ); +void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & outCycles ); + +//----------------------------------------------------------------------------- diff --git a/Spooky.cpp b/Spooky.cpp new file mode 100644 index 0000000..47f5d75 --- /dev/null +++ b/Spooky.cpp @@ -0,0 +1,347 @@ +// Spooky Hash +// A 128-bit noncryptographic hash, for checksums and table lookup +// By Bob Jenkins. Public domain. +// Oct 31 2010: published framework, disclaimer ShortHash isn't right +// Nov 7 2010: disabled ShortHash +// Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again + +#include <memory.h> +#include "Spooky.h" + +#define ALLOW_UNALIGNED_READS 1 + +// +// short hash ... it could be used on any message, +// but it's used by Spooky just for short messages. +// +void SpookyHash::Short( + const void *message, + size_t length, + uint64 *hash1, + uint64 *hash2) +{ + uint64 buf[sc_numVars]; + union + { + const uint8 *p8; + uint32 *p32; + uint64 *p64; + size_t i; + } u; + + u.p8 = (const uint8 *)message; + + if (!ALLOW_UNALIGNED_READS && (u.i & 0x7)) + { + memcpy(buf, message, length); + u.p64 = buf; + } + + size_t remainder = length%32; + uint64 a=*hash1; + uint64 b=*hash2; + uint64 c=sc_const; + uint64 d=sc_const; + + if (length > 15) + { + const uint64 *end = u.p64 + (length/32)*4; + + // handle all complete sets of 32 bytes + for (; u.p64 < end; u.p64 += 4) + { + c += u.p64[0]; + d += u.p64[1]; + ShortMix(a,b,c,d); + a += u.p64[2]; + b += u.p64[3]; + } + + //Handle the case of 16+ remaining bytes. + if (remainder >= 16) + { + c += u.p64[0]; + d += u.p64[1]; + ShortMix(a,b,c,d); + u.p64 += 2; + remainder -= 16; + } + } + + // Handle the last 0..15 bytes, and its length + d = ((uint64)length) << 56; + switch (remainder) + { + case 15: + d += ((uint64)u.p8[14]) << 48; + case 14: + d += ((uint64)u.p8[13]) << 40; + case 13: + d += ((uint64)u.p8[12]) << 32; + case 12: + d += u.p32[2]; + c += u.p64[0]; + break; + case 11: + d += ((uint64)u.p8[10]) << 16; + case 10: + d += ((uint64)u.p8[9]) << 8; + case 9: + d += (uint64)u.p8[8]; + case 8: + c += u.p64[0]; + break; + case 7: + c += ((uint64)u.p8[6]) << 48; + case 6: + c += ((uint64)u.p8[5]) << 40; + case 5: + c += ((uint64)u.p8[4]) << 32; + case 4: + c += u.p32[0]; + break; + case 3: + c += ((uint64)u.p8[2]) << 16; + case 2: + c += ((uint64)u.p8[1]) << 8; + case 1: + c += (uint64)u.p8[0]; + break; + case 0: + c += sc_const; + d += sc_const; + } + ShortEnd(a,b,c,d); + *hash1 = a; + *hash2 = b; +} + + + + +// do the whole hash in one call +void SpookyHash::Hash128( + const void *message, + size_t length, + uint64 *hash1, + uint64 *hash2) +{ + if (length < sc_bufSize) + { + Short(message, length, hash1, hash2); + return; + } + + uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; + uint64 buf[sc_numVars]; + uint64 *end; + union + { + const uint8 *p8; + uint64 *p64; + size_t i; + } u; + size_t remainder; + + h0=h3=h6=h9 = *hash1; + h1=h4=h7=h10 = *hash2; + h2=h5=h8=h11 = sc_const; + + u.p8 = (const uint8 *)message; + end = u.p64 + (length/sc_blockSize)*sc_numVars; + + // handle all whole sc_blockSize blocks of bytes + if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0)) + { + while (u.p64 < end) + { + Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p64 += sc_numVars; + } + } + else + { + while (u.p64 < end) + { + memcpy(buf, u.p64, sc_blockSize); + Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p64 += sc_numVars; + } + } + + // handle the last partial block of sc_blockSize bytes + remainder = (length - ((const uint8 *)end-(const uint8 *)message)); + memcpy(buf, end, remainder); + memset(((uint8 *)buf)+remainder, 0, sc_blockSize-remainder); + ((uint8 *)buf)[sc_blockSize-1] = remainder; + Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + + // do some final mixing + End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + *hash1 = h0; + *hash2 = h1; +} + + + +// init spooky state +void SpookyHash::Init(uint64 seed1, uint64 seed2) +{ + m_length = 0; + m_remainder = 0; + m_state[0] = seed1; + m_state[1] = seed2; +} + + +// add a message fragment to the state +void SpookyHash::Update(const void *message, size_t length) +{ + uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; + size_t newLength = length + m_remainder; + uint8 remainder; + union + { + const uint8 *p8; + uint64 *p64; + size_t i; + } u; + const uint64 *end; + + // Is this message fragment too short? If it is, stuff it away. + if (newLength < sc_bufSize) + { + memcpy(&((uint8 *)m_data)[m_remainder], message, length); + m_length = length + m_length; + m_remainder = (uint8)newLength; + return; + } + + // init the variables + if (m_length < sc_bufSize) + { + h0=h3=h6=h9 = m_state[0]; + h1=h4=h7=h10 = m_state[1]; + h2=h5=h8=h11 = sc_const; + } + else + { + h0 = m_state[0]; + h1 = m_state[1]; + h2 = m_state[2]; + h3 = m_state[3]; + h4 = m_state[4]; + h5 = m_state[5]; + h6 = m_state[6]; + h7 = m_state[7]; + h8 = m_state[8]; + h9 = m_state[9]; + h10 = m_state[10]; + h11 = m_state[11]; + } + m_length = length + m_length; + + // if we've got anything stuffed away, use it now + if (m_remainder) + { + uint8 prefix = sc_bufSize-m_remainder; + memcpy(&(((uint8 *)m_data)[m_remainder]), message, prefix); + u.p64 = m_data; + Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p8 = ((const uint8 *)message) + prefix; + length -= prefix; + } + else + { + u.p8 = (const uint8 *)message; + } + + // handle all whole blocks of sc_blockSize bytes + end = u.p64 + (length/sc_blockSize)*sc_numVars; + remainder = (uint8)(length-((const uint8 *)end-u.p8)); + if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0) + { + while (u.p64 < end) + { + Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p64 += sc_numVars; + } + } + else + { + while (u.p64 < end) + { + memcpy(m_data, u.p8, sc_blockSize); + Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p64 += sc_numVars; + } + } + + // stuff away the last few bytes + m_remainder = remainder; + memcpy(m_data, end, remainder); + + // stuff away the variables + m_state[0] = h0; + m_state[1] = h1; + m_state[2] = h2; + m_state[3] = h3; + m_state[4] = h4; + m_state[5] = h5; + m_state[6] = h6; + m_state[7] = h7; + m_state[8] = h8; + m_state[9] = h9; + m_state[10] = h10; + m_state[11] = h11; +} + + +// report the hash for the concatenation of all message fragments so far +void SpookyHash::Final(uint64 *hash1, uint64 *hash2) +{ + // init the variables + if (m_length < sc_bufSize) + { + Short( m_data, m_length, hash1, hash2); + return; + } + + const uint64 *data = (const uint64 *)m_data; + uint8 remainder = m_remainder; + + uint64 h0 = m_state[0]; + uint64 h1 = m_state[1]; + uint64 h2 = m_state[2]; + uint64 h3 = m_state[3]; + uint64 h4 = m_state[4]; + uint64 h5 = m_state[5]; + uint64 h6 = m_state[6]; + uint64 h7 = m_state[7]; + uint64 h8 = m_state[8]; + uint64 h9 = m_state[9]; + uint64 h10 = m_state[10]; + uint64 h11 = m_state[11]; + + if (remainder >= sc_blockSize) + { + // m_data can contain two blocks; handle any whole first block + Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + data += sc_numVars; + remainder -= sc_blockSize; + } + + // mix in the last partial block, and the length mod sc_blockSize + memset(&((uint8 *)data)[remainder], 0, (sc_blockSize-remainder)); + + ((uint8 *)data)[sc_blockSize-1] = remainder; + Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + + // do some final mixing + End(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + + *hash1 = h0; + *hash2 = h1; +} + diff --git a/Spooky.h b/Spooky.h new file mode 100644 index 0000000..cafd52e --- /dev/null +++ b/Spooky.h @@ -0,0 +1,293 @@ +// +// SpookyHash: a 128-bit noncryptographic hash function +// By Bob Jenkins, public domain +// Oct 31 2010: alpha, framework + SpookyHash::Mix appears right +// Oct 31 2011: alpha again, Mix only good to 2^^69 but rest appears right +// Dec 31 2011: beta, improved Mix, tested it for 2-bit deltas +// Feb 2 2012: production, same bits as beta +// Feb 5 2012: adjusted definitions of uint* to be more portable +// +// Up to 4 bytes/cycle for long messages. Reasonably fast for short messages. +// All 1 or 2 bit deltas achieve avalanche within 1% bias per output bit. +// +// This was developed for and tested on 64-bit x86-compatible processors. +// It assumes the processor is little-endian. There is a macro +// controlling whether unaligned reads are allowed (by default they are). +// This should be an equally good hash on big-endian machines, but it will +// compute different results on them than on little-endian machines. +// +// Google's CityHash has similar specs to SpookyHash, and CityHash is faster +// on some platforms. MD4 and MD5 also have similar specs, but they are orders +// of magnitude slower. CRCs are two or more times slower, but unlike +// SpookyHash, they have nice math for combining the CRCs of pieces to form +// the CRCs of wholes. There are also cryptographic hashes, but those are even +// slower than MD5. +// + +#include <stddef.h> + +#ifdef _MSC_VER +# define INLINE __forceinline + typedef unsigned __int64 uint64; + typedef unsigned __int32 uint32; + typedef unsigned __int16 uint16; + typedef unsigned __int8 uint8; +#else +# include <stdint.h> +# define INLINE inline + typedef uint64_t uint64; + typedef uint32_t uint32; + typedef uint16_t uint16; + typedef uint8_t uint8; +#endif + + +class SpookyHash +{ +public: + // + // SpookyHash: hash a single message in one call, produce 128-bit output + // + static void Hash128( + const void *message, // message to hash + size_t length, // length of message in bytes + uint64 *hash1, // in/out: in seed 1, out hash value 1 + uint64 *hash2); // in/out: in seed 2, out hash value 2 + + // + // Hash64: hash a single message in one call, return 64-bit output + // + static uint64 Hash64( + const void *message, // message to hash + size_t length, // length of message in bytes + uint64 seed) // seed + { + uint64 hash1 = seed; + Hash128(message, length, &hash1, &seed); + return hash1; + } + + // + // Hash32: hash a single message in one call, produce 32-bit output + // + static uint32 Hash32( + const void *message, // message to hash + size_t length, // length of message in bytes + uint32 seed) // seed + { + uint64 hash1 = seed, hash2 = seed; + Hash128(message, length, &hash1, &hash2); + return (uint32)hash1; + } + + // + // Init: initialize the context of a SpookyHash + // + void Init( + uint64 seed1, // any 64-bit value will do, including 0 + uint64 seed2); // different seeds produce independent hashes + + // + // Update: add a piece of a message to a SpookyHash state + // + void Update( + const void *message, // message fragment + size_t length); // length of message fragment in bytes + + + // + // Final: compute the hash for the current SpookyHash state + // + // This does not modify the state; you can keep updating it afterward + // + // The result is the same as if SpookyHash() had been called with + // all the pieces concatenated into one message. + // + void Final( + uint64 *hash1, // out only: first 64 bits of hash value. + uint64 *hash2); // out only: second 64 bits of hash value. + + // + // left rotate a 64-bit value by k bytes + // + static INLINE uint64 Rot64(uint64 x, int k) + { + return (x << k) | (x >> (64 - k)); + } + + // + // This is used if the input is 96 bytes long or longer. + // + // The internal state is fully overwritten every 96 bytes. + // Every input bit appears to cause at least 128 bits of entropy + // before 96 other bytes are combined, when run forward or backward + // For every input bit, + // Two inputs differing in just that input bit + // Where "differ" means xor or subtraction + // And the base value is random + // When run forward or backwards one Mix + // I tried 3 pairs of each; they all differed by at least 212 bits. + // + static INLINE void Mix( + const uint64 *data, + uint64 &s0, uint64 &s1, uint64 &s2, uint64 &s3, + uint64 &s4, uint64 &s5, uint64 &s6, uint64 &s7, + uint64 &s8, uint64 &s9, uint64 &s10,uint64 &s11) + { + s0 += data[0]; s2 ^= s10; s11 ^= s0; s0 = Rot64(s0,11); s11 += s1; + s1 += data[1]; s3 ^= s11; s0 ^= s1; s1 = Rot64(s1,32); s0 += s2; + s2 += data[2]; s4 ^= s0; s1 ^= s2; s2 = Rot64(s2,43); s1 += s3; + s3 += data[3]; s5 ^= s1; s2 ^= s3; s3 = Rot64(s3,31); s2 += s4; + s4 += data[4]; s6 ^= s2; s3 ^= s4; s4 = Rot64(s4,17); s3 += s5; + s5 += data[5]; s7 ^= s3; s4 ^= s5; s5 = Rot64(s5,28); s4 += s6; + s6 += data[6]; s8 ^= s4; s5 ^= s6; s6 = Rot64(s6,39); s5 += s7; + s7 += data[7]; s9 ^= s5; s6 ^= s7; s7 = Rot64(s7,57); s6 += s8; + s8 += data[8]; s10 ^= s6; s7 ^= s8; s8 = Rot64(s8,55); s7 += s9; + s9 += data[9]; s11 ^= s7; s8 ^= s9; s9 = Rot64(s9,54); s8 += s10; + s10 += data[10]; s0 ^= s8; s9 ^= s10; s10 = Rot64(s10,22); s9 += s11; + s11 += data[11]; s1 ^= s9; s10 ^= s11; s11 = Rot64(s11,46); s10 += s0; + } + + // + // Mix all 12 inputs together so that h0, h1 are a hash of them all. + // + // For two inputs differing in just the input bits + // Where "differ" means xor or subtraction + // And the base value is random, or a counting value starting at that bit + // The final result will have each bit of h0, h1 flip + // For every input bit, + // with probability 50 +- .3% + // For every pair of input bits, + // with probability 50 +- 3% + // + // This does not rely on the last Mix() call having already mixed some. + // Two iterations was almost good enough for a 64-bit result, but a + // 128-bit result is reported, so End() does three iterations. + // + static INLINE void EndPartial( + uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3, + uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, + uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11) + { + h11+= h1; h2 ^= h11; h1 = Rot64(h1,44); + h0 += h2; h3 ^= h0; h2 = Rot64(h2,15); + h1 += h3; h4 ^= h1; h3 = Rot64(h3,34); + h2 += h4; h5 ^= h2; h4 = Rot64(h4,21); + h3 += h5; h6 ^= h3; h5 = Rot64(h5,38); + h4 += h6; h7 ^= h4; h6 = Rot64(h6,33); + h5 += h7; h8 ^= h5; h7 = Rot64(h7,10); + h6 += h8; h9 ^= h6; h8 = Rot64(h8,13); + h7 += h9; h10^= h7; h9 = Rot64(h9,38); + h8 += h10; h11^= h8; h10= Rot64(h10,53); + h9 += h11; h0 ^= h9; h11= Rot64(h11,42); + h10+= h0; h1 ^= h10; h0 = Rot64(h0,54); + } + + static INLINE void End( + uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3, + uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, + uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11) + { + EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + } + + // + // The goal is for each bit of the input to expand into 128 bits of + // apparent entropy before it is fully overwritten. + // n trials both set and cleared at least m bits of h0 h1 h2 h3 + // n: 2 m: 29 + // n: 3 m: 46 + // n: 4 m: 57 + // n: 5 m: 107 + // n: 6 m: 146 + // n: 7 m: 152 + // when run forwards or backwards + // for all 1-bit and 2-bit diffs + // with diffs defined by either xor or subtraction + // with a base of all zeros plus a counter, or plus another bit, or random + // + static INLINE void ShortMix(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3) + { + h2 = Rot64(h2,50); h2 += h3; h0 ^= h2; + h3 = Rot64(h3,52); h3 += h0; h1 ^= h3; + h0 = Rot64(h0,30); h0 += h1; h2 ^= h0; + h1 = Rot64(h1,41); h1 += h2; h3 ^= h1; + h2 = Rot64(h2,54); h2 += h3; h0 ^= h2; + h3 = Rot64(h3,48); h3 += h0; h1 ^= h3; + h0 = Rot64(h0,38); h0 += h1; h2 ^= h0; + h1 = Rot64(h1,37); h1 += h2; h3 ^= h1; + h2 = Rot64(h2,62); h2 += h3; h0 ^= h2; + h3 = Rot64(h3,34); h3 += h0; h1 ^= h3; + h0 = Rot64(h0,5); h0 += h1; h2 ^= h0; + h1 = Rot64(h1,36); h1 += h2; h3 ^= h1; + } + + // + // Mix all 4 inputs together so that h0, h1 are a hash of them all. + // + // For two inputs differing in just the input bits + // Where "differ" means xor or subtraction + // And the base value is random, or a counting value starting at that bit + // The final result will have each bit of h0, h1 flip + // For every input bit, + // with probability 50 +- .3% (it is probably better than that) + // For every pair of input bits, + // with probability 50 +- .75% (the worst case is approximately that) + // + static INLINE void ShortEnd(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3) + { + h3 ^= h2; h2 = Rot64(h2,15); h3 += h2; + h0 ^= h3; h3 = Rot64(h3,52); h0 += h3; + h1 ^= h0; h0 = Rot64(h0,26); h1 += h0; + h2 ^= h1; h1 = Rot64(h1,51); h2 += h1; + h3 ^= h2; h2 = Rot64(h2,28); h3 += h2; + h0 ^= h3; h3 = Rot64(h3,9); h0 += h3; + h1 ^= h0; h0 = Rot64(h0,47); h1 += h0; + h2 ^= h1; h1 = Rot64(h1,54); h2 += h1; + h3 ^= h2; h2 = Rot64(h2,32); h3 += h2; + h0 ^= h3; h3 = Rot64(h3,25); h0 += h3; + h1 ^= h0; h0 = Rot64(h0,63); h1 += h0; + } + +private: + + // + // Short is used for messages under 192 bytes in length + // Short has a low startup cost, the normal mode is good for long + // keys, the cost crossover is at about 192 bytes. The two modes were + // held to the same quality bar. + // + static void Short( + const void *message, + size_t length, + uint64 *hash1, + uint64 *hash2); + + // number of uint64's in internal state + static const size_t sc_numVars = 12; + + // size of the internal state + static const size_t sc_blockSize = sc_numVars*8; + + // size of buffer of unhashed data, in bytes + static const size_t sc_bufSize = 2*sc_blockSize; + + // + // sc_const: a constant which: + // * is not zero + // * is odd + // * is a not-very-regular mix of 1's and 0's + // * does not need any other special mathematical properties + // + static const uint64 sc_const = 0xdeadbeefdeadbeefLL; + + uint64 m_data[2*sc_numVars]; // unhashed data, for partial messages + uint64 m_state[sc_numVars]; // internal state of the hash + size_t m_length; // total length of the input so far + uint8 m_remainder; // length of unhashed data stashed in m_data +}; + + + diff --git a/SpookyTest.cpp b/SpookyTest.cpp new file mode 100644 index 0000000..df9021e --- /dev/null +++ b/SpookyTest.cpp @@ -0,0 +1,16 @@ +#include "Spooky.h" + +void SpookyHash32_test(const void *key, int len, uint32_t seed, void *out) { + *(uint32_t*)out = SpookyHash::Hash32(key, len, seed); +} + +void SpookyHash64_test(const void *key, int len, uint32_t seed, void *out) { + *(uint64_t*)out = SpookyHash::Hash64(key, len, seed); +} + +void SpookyHash128_test(const void *key, int len, uint32_t seed, void *out) { + uint64_t h1 = seed, h2 = seed; + SpookyHash::Hash128(key, len, &h1, &h2); + ((uint64_t*)out)[0] = h1; + ((uint64_t*)out)[1] = h2; +} @@ -1,99 +1,99 @@ -#include "Stats.h"
-
-//-----------------------------------------------------------------------------
-
-double chooseK ( int n, int k )
-{
- if(k > (n - k)) k = n - k;
-
- double c = 1;
-
- for(int i = 0; i < k; i++)
- {
- double t = double(n-i) / double(i+1);
-
- c *= t;
- }
-
- return c;
-}
-
-double chooseUpToK ( int n, int k )
-{
- double c = 0;
-
- for(int i = 1; i <= k; i++)
- {
- c += chooseK(n,i);
- }
-
- return c;
-}
-
-//-----------------------------------------------------------------------------
-// Distribution "score"
-// TODO - big writeup of what this score means
-
-// Basically, we're computing a constant that says "The test distribution is as
-// uniform, RMS-wise, as a random distribution restricted to (1-X)*100 percent of
-// the bins. This makes for a nice uniform way to rate a distribution that isn't
-// dependent on the number of bins or the number of keys
-
-// (as long as # keys > # bins * 3 or so, otherwise random fluctuations show up
-// as distribution weaknesses)
-
-double calcScore ( const int * bins, const int bincount, const int keycount )
-{
- double n = bincount;
- double k = keycount;
-
- // compute rms value
-
- double r = 0;
-
- for(int i = 0; i < bincount; i++)
- {
- double b = bins[i];
-
- r += b*b;
- }
-
- r = sqrt(r / n);
-
- // compute fill factor
-
- double f = (k*k - 1) / (n*r*r - k);
-
- // rescale to (0,1) with 0 = good, 1 = bad
-
- return 1 - (f / n);
-}
-
-
-//----------------------------------------------------------------------------
-
-void plot ( double n )
-{
- double n2 = n * 1;
-
- if(n2 < 0) n2 = 0;
-
- n2 *= 100;
-
- if(n2 > 64) n2 = 64;
-
- int n3 = (int)n2;
-
- if(n3 == 0)
- printf(".");
- else
- {
- char x = '0' + char(n3);
-
- if(x > '9') x = 'X';
-
- printf("%c",x);
- }
-}
-
-//-----------------------------------------------------------------------------
+#include "Stats.h" + +//----------------------------------------------------------------------------- + +double chooseK ( int n, int k ) +{ + if(k > (n - k)) k = n - k; + + double c = 1; + + for(int i = 0; i < k; i++) + { + double t = double(n-i) / double(i+1); + + c *= t; + } + + return c; +} + +double chooseUpToK ( int n, int k ) +{ + double c = 0; + + for(int i = 1; i <= k; i++) + { + c += chooseK(n,i); + } + + return c; +} + +//----------------------------------------------------------------------------- +// Distribution "score" +// TODO - big writeup of what this score means + +// Basically, we're computing a constant that says "The test distribution is as +// uniform, RMS-wise, as a random distribution restricted to (1-X)*100 percent of +// the bins. This makes for a nice uniform way to rate a distribution that isn't +// dependent on the number of bins or the number of keys + +// (as long as # keys > # bins * 3 or so, otherwise random fluctuations show up +// as distribution weaknesses) + +double calcScore ( const int * bins, const int bincount, const int keycount ) +{ + double n = bincount; + double k = keycount; + + // compute rms value + + double r = 0; + + for(int i = 0; i < bincount; i++) + { + double b = bins[i]; + + r += b*b; + } + + r = sqrt(r / n); + + // compute fill factor + + double f = (k*k - 1) / (n*r*r - k); + + // rescale to (0,1) with 0 = good, 1 = bad + + return 1 - (f / n); +} + + +//---------------------------------------------------------------------------- + +void plot ( double n ) +{ + double n2 = n * 1; + + if(n2 < 0) n2 = 0; + + n2 *= 100; + + if(n2 > 64) n2 = 64; + + int n3 = (int)n2; + + if(n3 == 0) + printf("."); + else + { + char x = '0' + char(n3); + + if(x > '9') x = 'X'; + + printf("%c",x); + } +} + +//----------------------------------------------------------------------------- @@ -1,388 +1,388 @@ -#pragma once
-
-#include "Types.h"
-
-#include <math.h>
-#include <vector>
-#include <map>
-#include <algorithm> // for std::sort
-#include <string.h> // for memset
-#include <stdio.h> // for printf
-
-double calcScore ( const int * bins, const int bincount, const int ballcount );
-
-void plot ( double n );
-
-inline double ExpectedCollisions ( double balls, double bins )
-{
- return balls - bins + bins * pow(1 - 1/bins,balls);
-}
-
-double chooseK ( int b, int k );
-double chooseUpToK ( int n, int k );
-
-//-----------------------------------------------------------------------------
-
-inline uint32_t f3mix ( uint32_t k )
-{
- k ^= k >> 16;
- k *= 0x85ebca6b;
- k ^= k >> 13;
- k *= 0xc2b2ae35;
- k ^= k >> 16;
-
- return k;
-}
-
-//-----------------------------------------------------------------------------
-// Sort the hash list, count the total number of collisions and return
-// the first N collisions for further processing
-
-template< typename hashtype >
-int FindCollisions ( std::vector<hashtype> & hashes,
- HashSet<hashtype> & collisions,
- int maxCollisions )
-{
- int collcount = 0;
-
- std::sort(hashes.begin(),hashes.end());
-
- for(size_t i = 1; i < hashes.size(); i++)
- {
- if(hashes[i] == hashes[i-1])
- {
- collcount++;
-
- if((int)collisions.size() < maxCollisions)
- {
- collisions.insert(hashes[i]);
- }
- }
- }
-
- return collcount;
-}
-
-//-----------------------------------------------------------------------------
-
-template < class keytype, typename hashtype >
-int PrintCollisions ( hashfunc<hashtype> hash, std::vector<keytype> & keys )
-{
- int collcount = 0;
-
- typedef std::map<hashtype,keytype> htab;
- htab tab;
-
- for(size_t i = 1; i < keys.size(); i++)
- {
- keytype & k1 = keys[i];
-
- hashtype h = hash(&k1,sizeof(keytype),0);
-
- typename htab::iterator it = tab.find(h);
-
- if(it != tab.end())
- {
- keytype & k2 = (*it).second;
-
- printf("A: ");
- printbits(&k1,sizeof(keytype));
- printf("B: ");
- printbits(&k2,sizeof(keytype));
- }
- else
- {
- tab.insert( std::make_pair(h,k1) );
- }
- }
-
- return collcount;
-}
-
-//----------------------------------------------------------------------------
-// Measure the distribution "score" for each possible N-bit span up to 20 bits
-
-template< typename hashtype >
-double TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram )
-{
- printf("Testing distribution - ");
-
- if(drawDiagram) printf("\n");
-
- const int hashbits = sizeof(hashtype) * 8;
-
- int maxwidth = 20;
-
- // We need at least 5 keys per bin to reliably test distribution biases
- // down to 1%, so don't bother to test sparser distributions than that
-
- while(double(hashes.size()) / double(1 << maxwidth) < 5.0)
- {
- maxwidth--;
- }
-
- std::vector<int> bins;
- bins.resize(1 << maxwidth);
-
- double worst = 0;
- int worstStart = -1;
- int worstWidth = -1;
-
- for(int start = 0; start < hashbits; start++)
- {
- int width = maxwidth;
- int bincount = (1 << width);
-
- memset(&bins[0],0,sizeof(int)*bincount);
-
- for(size_t j = 0; j < hashes.size(); j++)
- {
- hashtype & hash = hashes[j];
-
- uint32_t index = window(&hash,sizeof(hash),start,width);
-
- bins[index]++;
- }
-
- // Test the distribution, then fold the bins in half,
- // repeat until we're down to 256 bins
-
- if(drawDiagram) printf("[");
-
- while(bincount >= 256)
- {
- double n = calcScore(&bins[0],bincount,(int)hashes.size());
-
- if(drawDiagram) plot(n);
-
- if(n > worst)
- {
- worst = n;
- worstStart = start;
- worstWidth = width;
- }
-
- width--;
- bincount /= 2;
-
- if(width < 8) break;
-
- for(int i = 0; i < bincount; i++)
- {
- bins[i] += bins[i+bincount];
- }
- }
-
- if(drawDiagram) printf("]\n");
- }
-
- double pct = worst * 100.0;
-
- printf("Worst bias is the %3d-bit window at bit %3d - %5.3f%%",worstWidth,worstStart,pct);
- if(pct >= 1.0) printf(" !!!!! ");
- printf("\n");
-
- return worst;
-}
-
-//----------------------------------------------------------------------------
-
-template < typename hashtype >
-bool TestHashList ( std::vector<hashtype> & hashes, std::vector<hashtype> & collisions, bool testDist, bool drawDiagram )
-{
- bool result = true;
-
- {
- size_t count = hashes.size();
-
- double expected = (double(count) * double(count-1)) / pow(2.0,double(sizeof(hashtype) * 8 + 1));
-
- printf("Testing collisions - Expected %8.2f, ",expected);
-
- double collcount = 0;
-
- HashSet<hashtype> collisions;
-
- collcount = FindCollisions(hashes,collisions,1000);
-
- printf("actual %8.2f (%5.2fx)",collcount, collcount / expected);
-
- if(sizeof(hashtype) == sizeof(uint32_t))
- {
- // 2x expected collisions = fail
-
- // #TODO - collision failure cutoff needs to be expressed as a standard deviation instead
- // of a scale factor, otherwise we fail erroneously if there are a small expected number
- // of collisions
-
- if(double(collcount) / double(expected) > 2.0)
- {
- printf(" !!!!! ");
- result = false;
- }
- }
- else
- {
- // For all hashes larger than 32 bits, _any_ collisions are a failure.
-
- if(collcount > 0)
- {
- printf(" !!!!! ");
- result = false;
- }
- }
-
- printf("\n");
- }
-
- //----------
-
- if(testDist)
- {
- TestDistribution(hashes,drawDiagram);
- }
-
- return result;
-}
-
-//----------
-
-template < typename hashtype >
-bool TestHashList ( std::vector<hashtype> & hashes, bool /*testColl*/, bool testDist, bool drawDiagram )
-{
- std::vector<hashtype> collisions;
-
- return TestHashList(hashes,collisions,testDist,drawDiagram);
-}
-
-//-----------------------------------------------------------------------------
-
-template < class keytype, typename hashtype >
-bool TestKeyList ( hashfunc<hashtype> hash, std::vector<keytype> & keys, bool testColl, bool testDist, bool drawDiagram )
-{
- int keycount = (int)keys.size();
-
- std::vector<hashtype> hashes;
-
- hashes.resize(keycount);
-
- printf("Hashing");
-
- for(int i = 0; i < keycount; i++)
- {
- if(i % (keycount / 10) == 0) printf(".");
-
- keytype & k = keys[i];
-
- hash(&k,sizeof(k),0,&hashes[i]);
- }
-
- printf("\n");
-
- bool result = TestHashList(hashes,testColl,testDist,drawDiagram);
-
- printf("\n");
-
- return result;
-}
-
-//-----------------------------------------------------------------------------
-// Bytepair test - generate 16-bit indices from all possible non-overlapping
-// 8-bit sections of the hash value, check distribution on all of them.
-
-// This is a very good test for catching weak intercorrelations between bits -
-// much harder to pass than the normal distribution test. However, it doesn't
-// really model the normal usage of hash functions in hash table lookup, so
-// I'm not sure it's that useful (and hash functions that fail this test but
-// pass the normal distribution test still work well in practice)
-
-template < typename hashtype >
-double TestDistributionBytepairs ( std::vector<hashtype> & hashes, bool drawDiagram )
-{
- const int nbytes = sizeof(hashtype);
- const int hashbits = nbytes * 8;
-
- const int nbins = 65536;
-
- std::vector<int> bins(nbins,0);
-
- double worst = 0;
-
- for(int a = 0; a < hashbits; a++)
- {
- if(drawDiagram) if((a % 8 == 0) && (a > 0)) printf("\n");
-
- if(drawDiagram) printf("[");
-
- for(int b = 0; b < hashbits; b++)
- {
- if(drawDiagram) if((b % 8 == 0) && (b > 0)) printf(" ");
-
- bins.clear();
- bins.resize(nbins,0);
-
- for(size_t i = 0; i < hashes.size(); i++)
- {
- hashtype & hash = hashes[i];
-
- uint32_t pa = window(&hash,sizeof(hash),a,8);
- uint32_t pb = window(&hash,sizeof(hash),b,8);
-
- bins[pa | (pb << 8)]++;
- }
-
- double s = calcScore(bins,bins.size(),hashes.size());
-
- if(drawDiagram) plot(s);
-
- if(s > worst)
- {
- worst = s;
- }
- }
-
- if(drawDiagram) printf("]\n");
- }
-
- return worst;
-}
-
-//-----------------------------------------------------------------------------
-// Simplified test - only check 64k distributions, and only on byte boundaries
-
-template < typename hashtype >
-void TestDistributionFast ( std::vector<hashtype> & hashes, double & dworst, double & davg )
-{
- const int hashbits = sizeof(hashtype) * 8;
- const int nbins = 65536;
-
- std::vector<int> bins(nbins,0);
-
- dworst = -1.0e90;
- davg = 0;
-
- for(int start = 0; start < hashbits; start += 8)
- {
- bins.clear();
- bins.resize(nbins,0);
-
- for(size_t j = 0; j < hashes.size(); j++)
- {
- hashtype & hash = hashes[j];
-
- uint32_t index = window(&hash,sizeof(hash),start,16);
-
- bins[index]++;
- }
-
- double n = calcScore(&bins.front(),(int)bins.size(),(int)hashes.size());
-
- davg += n;
-
- if(n > dworst) dworst = n;
- }
-
- davg /= double(hashbits/8);
-}
-
-//-----------------------------------------------------------------------------
+#pragma once + +#include "Types.h" + +#include <math.h> +#include <vector> +#include <map> +#include <algorithm> // for std::sort +#include <string.h> // for memset +#include <stdio.h> // for printf + +double calcScore ( const int * bins, const int bincount, const int ballcount ); + +void plot ( double n ); + +inline double ExpectedCollisions ( double balls, double bins ) +{ + return balls - bins + bins * pow(1 - 1/bins,balls); +} + +double chooseK ( int b, int k ); +double chooseUpToK ( int n, int k ); + +//----------------------------------------------------------------------------- + +inline uint32_t f3mix ( uint32_t k ) +{ + k ^= k >> 16; + k *= 0x85ebca6b; + k ^= k >> 13; + k *= 0xc2b2ae35; + k ^= k >> 16; + + return k; +} + +//----------------------------------------------------------------------------- +// Sort the hash list, count the total number of collisions and return +// the first N collisions for further processing + +template< typename hashtype > +int FindCollisions ( std::vector<hashtype> & hashes, + HashSet<hashtype> & collisions, + int maxCollisions ) +{ + int collcount = 0; + + std::sort(hashes.begin(),hashes.end()); + + for(size_t i = 1; i < hashes.size(); i++) + { + if(hashes[i] == hashes[i-1]) + { + collcount++; + + if((int)collisions.size() < maxCollisions) + { + collisions.insert(hashes[i]); + } + } + } + + return collcount; +} + +//----------------------------------------------------------------------------- + +template < class keytype, typename hashtype > +int PrintCollisions ( hashfunc<hashtype> hash, std::vector<keytype> & keys ) +{ + int collcount = 0; + + typedef std::map<hashtype,keytype> htab; + htab tab; + + for(size_t i = 1; i < keys.size(); i++) + { + keytype & k1 = keys[i]; + + hashtype h = hash(&k1,sizeof(keytype),0); + + typename htab::iterator it = tab.find(h); + + if(it != tab.end()) + { + keytype & k2 = (*it).second; + + printf("A: "); + printbits(&k1,sizeof(keytype)); + printf("B: "); + printbits(&k2,sizeof(keytype)); + } + else + { + tab.insert( std::make_pair(h,k1) ); + } + } + + return collcount; +} + +//---------------------------------------------------------------------------- +// Measure the distribution "score" for each possible N-bit span up to 20 bits + +template< typename hashtype > +double TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram ) +{ + printf("Testing distribution - "); + + if(drawDiagram) printf("\n"); + + const int hashbits = sizeof(hashtype) * 8; + + int maxwidth = 20; + + // We need at least 5 keys per bin to reliably test distribution biases + // down to 1%, so don't bother to test sparser distributions than that + + while(double(hashes.size()) / double(1 << maxwidth) < 5.0) + { + maxwidth--; + } + + std::vector<int> bins; + bins.resize(1 << maxwidth); + + double worst = 0; + int worstStart = -1; + int worstWidth = -1; + + for(int start = 0; start < hashbits; start++) + { + int width = maxwidth; + int bincount = (1 << width); + + memset(&bins[0],0,sizeof(int)*bincount); + + for(size_t j = 0; j < hashes.size(); j++) + { + hashtype & hash = hashes[j]; + + uint32_t index = window(&hash,sizeof(hash),start,width); + + bins[index]++; + } + + // Test the distribution, then fold the bins in half, + // repeat until we're down to 256 bins + + if(drawDiagram) printf("["); + + while(bincount >= 256) + { + double n = calcScore(&bins[0],bincount,(int)hashes.size()); + + if(drawDiagram) plot(n); + + if(n > worst) + { + worst = n; + worstStart = start; + worstWidth = width; + } + + width--; + bincount /= 2; + + if(width < 8) break; + + for(int i = 0; i < bincount; i++) + { + bins[i] += bins[i+bincount]; + } + } + + if(drawDiagram) printf("]\n"); + } + + double pct = worst * 100.0; + + printf("Worst bias is the %3d-bit window at bit %3d - %5.3f%%",worstWidth,worstStart,pct); + if(pct >= 1.0) printf(" !!!!! "); + printf("\n"); + + return worst; +} + +//---------------------------------------------------------------------------- + +template < typename hashtype > +bool TestHashList ( std::vector<hashtype> & hashes, std::vector<hashtype> & collisions, bool testDist, bool drawDiagram ) +{ + bool result = true; + + { + size_t count = hashes.size(); + + double expected = (double(count) * double(count-1)) / pow(2.0,double(sizeof(hashtype) * 8 + 1)); + + printf("Testing collisions - Expected %8.2f, ",expected); + + double collcount = 0; + + HashSet<hashtype> collisions; + + collcount = FindCollisions(hashes,collisions,1000); + + printf("actual %8.2f (%5.2fx)",collcount, collcount / expected); + + if(sizeof(hashtype) == sizeof(uint32_t)) + { + // 2x expected collisions = fail + + // #TODO - collision failure cutoff needs to be expressed as a standard deviation instead + // of a scale factor, otherwise we fail erroneously if there are a small expected number + // of collisions + + if(double(collcount) / double(expected) > 2.0) + { + printf(" !!!!! "); + result = false; + } + } + else + { + // For all hashes larger than 32 bits, _any_ collisions are a failure. + + if(collcount > 0) + { + printf(" !!!!! "); + result = false; + } + } + + printf("\n"); + } + + //---------- + + if(testDist) + { + TestDistribution(hashes,drawDiagram); + } + + return result; +} + +//---------- + +template < typename hashtype > +bool TestHashList ( std::vector<hashtype> & hashes, bool /*testColl*/, bool testDist, bool drawDiagram ) +{ + std::vector<hashtype> collisions; + + return TestHashList(hashes,collisions,testDist,drawDiagram); +} + +//----------------------------------------------------------------------------- + +template < class keytype, typename hashtype > +bool TestKeyList ( hashfunc<hashtype> hash, std::vector<keytype> & keys, bool testColl, bool testDist, bool drawDiagram ) +{ + int keycount = (int)keys.size(); + + std::vector<hashtype> hashes; + + hashes.resize(keycount); + + printf("Hashing"); + + for(int i = 0; i < keycount; i++) + { + if(i % (keycount / 10) == 0) printf("."); + + keytype & k = keys[i]; + + hash(&k,sizeof(k),0,&hashes[i]); + } + + printf("\n"); + + bool result = TestHashList(hashes,testColl,testDist,drawDiagram); + + printf("\n"); + + return result; +} + +//----------------------------------------------------------------------------- +// Bytepair test - generate 16-bit indices from all possible non-overlapping +// 8-bit sections of the hash value, check distribution on all of them. + +// This is a very good test for catching weak intercorrelations between bits - +// much harder to pass than the normal distribution test. However, it doesn't +// really model the normal usage of hash functions in hash table lookup, so +// I'm not sure it's that useful (and hash functions that fail this test but +// pass the normal distribution test still work well in practice) + +template < typename hashtype > +double TestDistributionBytepairs ( std::vector<hashtype> & hashes, bool drawDiagram ) +{ + const int nbytes = sizeof(hashtype); + const int hashbits = nbytes * 8; + + const int nbins = 65536; + + std::vector<int> bins(nbins,0); + + double worst = 0; + + for(int a = 0; a < hashbits; a++) + { + if(drawDiagram) if((a % 8 == 0) && (a > 0)) printf("\n"); + + if(drawDiagram) printf("["); + + for(int b = 0; b < hashbits; b++) + { + if(drawDiagram) if((b % 8 == 0) && (b > 0)) printf(" "); + + bins.clear(); + bins.resize(nbins,0); + + for(size_t i = 0; i < hashes.size(); i++) + { + hashtype & hash = hashes[i]; + + uint32_t pa = window(&hash,sizeof(hash),a,8); + uint32_t pb = window(&hash,sizeof(hash),b,8); + + bins[pa | (pb << 8)]++; + } + + double s = calcScore(bins,bins.size(),hashes.size()); + + if(drawDiagram) plot(s); + + if(s > worst) + { + worst = s; + } + } + + if(drawDiagram) printf("]\n"); + } + + return worst; +} + +//----------------------------------------------------------------------------- +// Simplified test - only check 64k distributions, and only on byte boundaries + +template < typename hashtype > +void TestDistributionFast ( std::vector<hashtype> & hashes, double & dworst, double & davg ) +{ + const int hashbits = sizeof(hashtype) * 8; + const int nbins = 65536; + + std::vector<int> bins(nbins,0); + + dworst = -1.0e90; + davg = 0; + + for(int start = 0; start < hashbits; start += 8) + { + bins.clear(); + bins.resize(nbins,0); + + for(size_t j = 0; j < hashes.size(); j++) + { + hashtype & hash = hashes[j]; + + uint32_t index = window(&hash,sizeof(hash),start,16); + + bins[index]++; + } + + double n = calcScore(&bins.front(),(int)bins.size(),(int)hashes.size()); + + davg += n; + + if(n > dworst) dworst = n; + } + + davg /= double(hashbits/8); +} + +//----------------------------------------------------------------------------- diff --git a/SuperFastHash.cpp b/SuperFastHash.cpp index 38d030d..1f6d39a 100644 --- a/SuperFastHash.cpp +++ b/SuperFastHash.cpp @@ -1,76 +1,76 @@ -#include "Platform.h"
-#include <stdio.h> // for NULL
-
-/* By Paul Hsieh (C) 2004, 2005. Covered under the Paul Hsieh derivative
- license. See:
- http://www.azillionmonkeys.com/qed/weblicense.html for license details.
-
- http://www.azillionmonkeys.com/qed/hash.html */
-
-/*
-#undef get16bits
-#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
- || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
-#define get16bits(d) (*((const uint16_t *) (d)))
-#endif
-
-#if !defined (get16bits)
-#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
- +(uint32_t)(((const uint8_t *)(d))[0]) )
-#endif
-*/
-
-FORCE_INLINE uint16_t get16bits ( const void * p )
-{
- return *(const uint16_t*)p;
-}
-
-uint32_t SuperFastHash (const signed char * data, int len) {
-uint32_t hash = 0, tmp;
-int rem;
-
- if (len <= 0 || data == NULL) return 0;
-
- rem = len & 3;
- len >>= 2;
-
- /* Main loop */
- for (;len > 0; len--) {
- hash += get16bits (data);
- tmp = (get16bits (data+2) << 11) ^ hash;
- hash = (hash << 16) ^ tmp;
- data += 2*sizeof (uint16_t);
- hash += hash >> 11;
- }
-
- /* Handle end cases */
- switch (rem) {
- case 3: hash += get16bits (data);
- hash ^= hash << 16;
- hash ^= data[sizeof (uint16_t)] << 18;
- hash += hash >> 11;
- break;
- case 2: hash += get16bits (data);
- hash ^= hash << 11;
- hash += hash >> 17;
- break;
- case 1: hash += *data;
- hash ^= hash << 10;
- hash += hash >> 1;
- }
-
- /* Force "avalanching" of final 127 bits */
- hash ^= hash << 3;
- hash += hash >> 5;
- hash ^= hash << 4;
- hash += hash >> 17;
- hash ^= hash << 25;
- hash += hash >> 6;
-
- return hash;
-}
-
-void SuperFastHash ( const void * key, int len, uint32_t /*seed*/, void * out )
-{
- *(uint32_t*)out = SuperFastHash((const signed char*)key,len);
-}
+#include "Platform.h" +#include <stdio.h> // for NULL + +/* By Paul Hsieh (C) 2004, 2005. Covered under the Paul Hsieh derivative + license. See: + http://www.azillionmonkeys.com/qed/weblicense.html for license details. + + http://www.azillionmonkeys.com/qed/hash.html */ + +/* +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define get16bits(d) (*((const uint16_t *) (d))) +#endif + +#if !defined (get16bits) +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\ + +(uint32_t)(((const uint8_t *)(d))[0]) ) +#endif +*/ + +FORCE_INLINE uint16_t get16bits ( const void * p ) +{ + return *(const uint16_t*)p; +} + +uint32_t SuperFastHash (const signed char * data, int len) { +uint32_t hash = 0, tmp; +int rem; + + if (len <= 0 || data == NULL) return 0; + + rem = len & 3; + len >>= 2; + + /* Main loop */ + for (;len > 0; len--) { + hash += get16bits (data); + tmp = (get16bits (data+2) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + data += 2*sizeof (uint16_t); + hash += hash >> 11; + } + + /* Handle end cases */ + switch (rem) { + case 3: hash += get16bits (data); + hash ^= hash << 16; + hash ^= data[sizeof (uint16_t)] << 18; + hash += hash >> 11; + break; + case 2: hash += get16bits (data); + hash ^= hash << 11; + hash += hash >> 17; + break; + case 1: hash += *data; + hash ^= hash << 10; + hash += hash >> 1; + } + + /* Force "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + + return hash; +} + +void SuperFastHash ( const void * key, int len, uint32_t /*seed*/, void * out ) +{ + *(uint32_t*)out = SuperFastHash((const signed char*)key,len); +} @@ -1,148 +1,148 @@ -#include "Types.h"
-
-#include "Random.h"
-
-#include <stdio.h>
-
-uint32_t MurmurOAAT ( const void * blob, int len, uint32_t seed );
-
-//-----------------------------------------------------------------------------
-
-#if defined(_MSC_VER)
-#pragma optimize( "", off )
-#endif
-
-void blackhole ( uint32_t )
-{
-}
-
-uint32_t whitehole ( void )
-{
- return 0;
-}
-
-#if defined(_MSC_VER)
-#pragma optimize( "", on )
-#endif
-
-uint32_t g_verify = 1;
-
-void MixVCode ( const void * blob, int len )
-{
- g_verify = MurmurOAAT(blob,len,g_verify);
-}
-
-//-----------------------------------------------------------------------------
-
-bool isprime ( uint32_t x )
-{
- uint32_t p[] =
- {
- 2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,
- 103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,
- 199,211,223,227,229,233,239,241,251
- };
-
- for(size_t i=0; i < sizeof(p)/sizeof(uint32_t); i++)
- {
- if((x % p[i]) == 0)
- {
- return false;
- }
- }
-
- for(int i = 257; i < 65536; i += 2)
- {
- if((x % i) == 0)
- {
- return false;
- }
- }
-
- return true;
-}
-
-void GenerateMixingConstants ( void )
-{
- Rand r(8350147);
-
- int count = 0;
-
- int trials = 0;
- int bitfail = 0;
- int popfail = 0;
- int matchfail = 0;
- int primefail = 0;
-
- //for(uint32_t x = 1; x; x++)
- while(count < 100)
- {
- //if(x % 100000000 == 0) printf(".");
-
- trials++;
- uint32_t b = r.rand_u32();
- //uint32_t b = x;
-
- //----------
- // must have between 14 and 18 set bits
-
- if(popcount(b) < 16) { b = 0; popfail++; }
- if(popcount(b) > 16) { b = 0; popfail++; }
-
- if(b == 0) continue;
-
- //----------
- // must have 3-5 bits set per 8-bit window
-
- for(int i = 0; i < 32; i++)
- {
- uint32_t c = ROTL32(b,i) & 0xFF;
-
- if(popcount(c) < 3) { b = 0; bitfail++; break; }
- if(popcount(c) > 5) { b = 0; bitfail++; break; }
- }
-
- if(b == 0) continue;
-
- //----------
- // all 8-bit windows must be different
-
- uint8_t match[256];
-
- memset(match,0,256);
-
- for(int i = 0; i < 32; i++)
- {
- uint32_t c = ROTL32(b,i) & 0xFF;
-
- if(match[c]) { b = 0; matchfail++; break; }
-
- match[c] = 1;
- }
-
- if(b == 0) continue;
-
- //----------
- // must be prime
-
- if(!isprime(b))
- {
- b = 0;
- primefail++;
- }
-
- if(b == 0) continue;
-
- //----------
-
- if(b)
- {
- printf("0x%08x : 0x%08x\n",b,~b);
- count++;
- }
- }
-
- printf("%d %d %d %d %d %d\n",trials,popfail,bitfail,matchfail,primefail,count);
-}
-
-//-----------------------------------------------------------------------------
+#include "Types.h" + +#include "Random.h" + +#include <stdio.h> + +uint32_t MurmurOAAT ( const void * blob, int len, uint32_t seed ); + +//----------------------------------------------------------------------------- + +#if defined(_MSC_VER) +#pragma optimize( "", off ) +#endif + +void blackhole ( uint32_t ) +{ +} + +uint32_t whitehole ( void ) +{ + return 0; +} + +#if defined(_MSC_VER) +#pragma optimize( "", on ) +#endif + +uint32_t g_verify = 1; + +void MixVCode ( const void * blob, int len ) +{ + g_verify = MurmurOAAT(blob,len,g_verify); +} + +//----------------------------------------------------------------------------- + +bool isprime ( uint32_t x ) +{ + uint32_t p[] = + { + 2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101, + 103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197, + 199,211,223,227,229,233,239,241,251 + }; + + for(size_t i=0; i < sizeof(p)/sizeof(uint32_t); i++) + { + if((x % p[i]) == 0) + { + return false; + } + } + + for(int i = 257; i < 65536; i += 2) + { + if((x % i) == 0) + { + return false; + } + } + + return true; +} + +void GenerateMixingConstants ( void ) +{ + Rand r(8350147); + + int count = 0; + + int trials = 0; + int bitfail = 0; + int popfail = 0; + int matchfail = 0; + int primefail = 0; + + //for(uint32_t x = 1; x; x++) + while(count < 100) + { + //if(x % 100000000 == 0) printf("."); + + trials++; + uint32_t b = r.rand_u32(); + //uint32_t b = x; + + //---------- + // must have between 14 and 18 set bits + + if(popcount(b) < 16) { b = 0; popfail++; } + if(popcount(b) > 16) { b = 0; popfail++; } + + if(b == 0) continue; + + //---------- + // must have 3-5 bits set per 8-bit window + + for(int i = 0; i < 32; i++) + { + uint32_t c = ROTL32(b,i) & 0xFF; + + if(popcount(c) < 3) { b = 0; bitfail++; break; } + if(popcount(c) > 5) { b = 0; bitfail++; break; } + } + + if(b == 0) continue; + + //---------- + // all 8-bit windows must be different + + uint8_t match[256]; + + memset(match,0,256); + + for(int i = 0; i < 32; i++) + { + uint32_t c = ROTL32(b,i) & 0xFF; + + if(match[c]) { b = 0; matchfail++; break; } + + match[c] = 1; + } + + if(b == 0) continue; + + //---------- + // must be prime + + if(!isprime(b)) + { + b = 0; + primefail++; + } + + if(b == 0) continue; + + //---------- + + if(b) + { + printf("0x%08x : 0x%08x\n",b,~b); + count++; + } + } + + printf("%d %d %d %d %d %d\n",trials,popfail,bitfail,matchfail,primefail,count); +} + +//----------------------------------------------------------------------------- @@ -1,374 +1,374 @@ -#pragma once
-
-#include "Platform.h"
-#include "Bitvec.h"
-
-#include <memory.h>
-#include <vector>
-#include <map>
-#include <set>
-
-//-----------------------------------------------------------------------------
-// If the optimizer detects that a value in a speed test is constant or unused,
-// the optimizer may remove references to it or otherwise create code that
-// would not occur in a real-world application. To prevent the optimizer from
-// doing this we declare two trivial functions that either sink or source data,
-// and bar the compiler from optimizing them.
-
-void blackhole ( uint32_t x );
-uint32_t whitehole ( void );
-
-//-----------------------------------------------------------------------------
-// We want to verify that every test produces the same result on every platform
-// To do this, we hash the results of every test to produce an overall
-// verification value for the whole test suite. If two runs produce the same
-// verification value, then every test in both run produced the same results
-
-extern uint32_t g_verify;
-
-// Mix the given blob of data into the verification code
-
-void MixVCode ( const void * blob, int len );
-
-
-//-----------------------------------------------------------------------------
-
-typedef void (*pfHash) ( const void * blob, const int len, const uint32_t seed, void * out );
-
-struct ByteVec : public std::vector<uint8_t>
-{
- ByteVec ( const void * key, int len )
- {
- resize(len);
- memcpy(&front(),key,len);
- }
-};
-
-template< typename hashtype, typename keytype >
-struct CollisionMap : public std::map< hashtype, std::vector<keytype> >
-{
-};
-
-template< typename hashtype >
-struct HashSet : public std::set<hashtype>
-{
-};
-
-//-----------------------------------------------------------------------------
-
-template < class T >
-class hashfunc
-{
-public:
-
- hashfunc ( pfHash h ) : m_hash(h)
- {
- }
-
- inline void operator () ( const void * key, const int len, const uint32_t seed, uint32_t * out )
- {
- m_hash(key,len,seed,out);
- }
-
- inline operator pfHash ( void ) const
- {
- return m_hash;
- }
-
- inline T operator () ( const void * key, const int len, const uint32_t seed )
- {
- T result;
-
- m_hash(key,len,seed,(uint32_t*)&result);
-
- return result;
- }
-
- pfHash m_hash;
-};
-
-//-----------------------------------------------------------------------------
-// Key-processing callback objects. Simplifies keyset testing a bit.
-
-struct KeyCallback
-{
- KeyCallback() : m_count(0)
- {
- }
-
- virtual ~KeyCallback()
- {
- }
-
- virtual void operator() ( const void * key, int len )
- {
- m_count++;
- }
-
- virtual void reserve ( int keycount )
- {
- };
-
- int m_count;
-};
-
-//----------
-
-template<typename hashtype>
-struct HashCallback : public KeyCallback
-{
- typedef std::vector<hashtype> hashvec;
-
- HashCallback ( pfHash hash, hashvec & hashes ) : m_hashes(hashes), m_pfHash(hash)
- {
- m_hashes.clear();
- }
-
- virtual void operator () ( const void * key, int len )
- {
- size_t newsize = m_hashes.size() + 1;
-
- m_hashes.resize(newsize);
-
- m_pfHash(key,len,0,&m_hashes.back());
- }
-
- virtual void reserve ( int keycount )
- {
- m_hashes.reserve(keycount);
- }
-
- hashvec & m_hashes;
- pfHash m_pfHash;
-
- //----------
-
-private:
-
- HashCallback & operator = ( const HashCallback & );
-};
-
-//----------
-
-template<typename hashtype>
-struct CollisionCallback : public KeyCallback
-{
- typedef HashSet<hashtype> hashset;
- typedef CollisionMap<hashtype,ByteVec> collmap;
-
- CollisionCallback ( pfHash hash, hashset & collisions, collmap & cmap )
- : m_pfHash(hash),
- m_collisions(collisions),
- m_collmap(cmap)
- {
- }
-
- virtual void operator () ( const void * key, int len )
- {
- hashtype h;
-
- m_pfHash(key,len,0,&h);
-
- if(m_collisions.count(h))
- {
- m_collmap[h].push_back( ByteVec(key,len) );
- }
- }
-
- //----------
-
- pfHash m_pfHash;
- hashset & m_collisions;
- collmap & m_collmap;
-
-private:
-
- CollisionCallback & operator = ( const CollisionCallback & c );
-};
-
-//-----------------------------------------------------------------------------
-
-template < int _bits >
-class Blob
-{
-public:
-
- Blob()
- {
- for(size_t i = 0; i < sizeof(bytes); i++)
- {
- bytes[i] = 0;
- }
- }
-
- Blob ( int x )
- {
- for(size_t i = 0; i < sizeof(bytes); i++)
- {
- bytes[i] = 0;
- }
-
- *(int*)bytes = x;
- }
-
- Blob ( const Blob & k )
- {
- for(size_t i = 0; i < sizeof(bytes); i++)
- {
- bytes[i] = k.bytes[i];
- }
- }
-
- Blob & operator = ( const Blob & k )
- {
- for(size_t i = 0; i < sizeof(bytes); i++)
- {
- bytes[i] = k.bytes[i];
- }
-
- return *this;
- }
-
- Blob ( uint64_t a, uint64_t b )
- {
- uint64_t t[2] = {a,b};
- set(&t,16);
- }
-
- void set ( const void * blob, size_t len )
- {
- const uint8_t * k = (const uint8_t*)blob;
-
- len = len > sizeof(bytes) ? sizeof(bytes) : len;
-
- for(size_t i = 0; i < len; i++)
- {
- bytes[i] = k[i];
- }
-
- for(size_t i = len; i < sizeof(bytes); i++)
- {
- bytes[i] = 0;
- }
- }
-
- uint8_t & operator [] ( int i )
- {
- return bytes[i];
- }
-
- const uint8_t & operator [] ( int i ) const
- {
- return bytes[i];
- }
-
- //----------
- // boolean operations
-
- bool operator < ( const Blob & k ) const
- {
- for(size_t i = 0; i < sizeof(bytes); i++)
- {
- if(bytes[i] < k.bytes[i]) return true;
- if(bytes[i] > k.bytes[i]) return false;
- }
-
- return false;
- }
-
- bool operator == ( const Blob & k ) const
- {
- for(size_t i = 0; i < sizeof(bytes); i++)
- {
- if(bytes[i] != k.bytes[i]) return false;
- }
-
- return true;
- }
-
- bool operator != ( const Blob & k ) const
- {
- return !(*this == k);
- }
-
- //----------
- // bitwise operations
-
- Blob operator ^ ( const Blob & k ) const
- {
- Blob t;
-
- for(size_t i = 0; i < sizeof(bytes); i++)
- {
- t.bytes[i] = bytes[i] ^ k.bytes[i];
- }
-
- return t;
- }
-
- Blob & operator ^= ( const Blob & k )
- {
- for(size_t i = 0; i < sizeof(bytes); i++)
- {
- bytes[i] ^= k.bytes[i];
- }
-
- return *this;
- }
-
- int operator & ( int x )
- {
- return (*(int*)bytes) & x;
- }
-
- Blob & operator &= ( const Blob & k )
- {
- for(size_t i = 0; i < sizeof(bytes); i++)
- {
- bytes[i] &= k.bytes[i];
- }
- }
-
- Blob operator << ( int c )
- {
- Blob t = *this;
-
- lshift(&t.bytes[0],sizeof(bytes),c);
-
- return t;
- }
-
- Blob operator >> ( int c )
- {
- Blob t = *this;
-
- rshift(&t.bytes[0],sizeof(bytes),c);
-
- return t;
- }
-
- Blob & operator <<= ( int c )
- {
- lshift(&bytes[0],sizeof(bytes),c);
-
- return *this;
- }
-
- Blob & operator >>= ( int c )
- {
- rshift(&bytes[0],sizeof(bytes),c);
-
- return *this;
- }
-
- //----------
-
-private:
-
- uint8_t bytes[(_bits+7)/8];
-};
-
-typedef Blob<128> uint128_t;
-typedef Blob<256> uint256_t;
-
-//-----------------------------------------------------------------------------
+#pragma once + +#include "Platform.h" +#include "Bitvec.h" + +#include <memory.h> +#include <vector> +#include <map> +#include <set> + +//----------------------------------------------------------------------------- +// If the optimizer detects that a value in a speed test is constant or unused, +// the optimizer may remove references to it or otherwise create code that +// would not occur in a real-world application. To prevent the optimizer from +// doing this we declare two trivial functions that either sink or source data, +// and bar the compiler from optimizing them. + +void blackhole ( uint32_t x ); +uint32_t whitehole ( void ); + +//----------------------------------------------------------------------------- +// We want to verify that every test produces the same result on every platform +// To do this, we hash the results of every test to produce an overall +// verification value for the whole test suite. If two runs produce the same +// verification value, then every test in both run produced the same results + +extern uint32_t g_verify; + +// Mix the given blob of data into the verification code + +void MixVCode ( const void * blob, int len ); + + +//----------------------------------------------------------------------------- + +typedef void (*pfHash) ( const void * blob, const int len, const uint32_t seed, void * out ); + +struct ByteVec : public std::vector<uint8_t> +{ + ByteVec ( const void * key, int len ) + { + resize(len); + memcpy(&front(),key,len); + } +}; + +template< typename hashtype, typename keytype > +struct CollisionMap : public std::map< hashtype, std::vector<keytype> > +{ +}; + +template< typename hashtype > +struct HashSet : public std::set<hashtype> +{ +}; + +//----------------------------------------------------------------------------- + +template < class T > +class hashfunc +{ +public: + + hashfunc ( pfHash h ) : m_hash(h) + { + } + + inline void operator () ( const void * key, const int len, const uint32_t seed, uint32_t * out ) + { + m_hash(key,len,seed,out); + } + + inline operator pfHash ( void ) const + { + return m_hash; + } + + inline T operator () ( const void * key, const int len, const uint32_t seed ) + { + T result; + + m_hash(key,len,seed,(uint32_t*)&result); + + return result; + } + + pfHash m_hash; +}; + +//----------------------------------------------------------------------------- +// Key-processing callback objects. Simplifies keyset testing a bit. + +struct KeyCallback +{ + KeyCallback() : m_count(0) + { + } + + virtual ~KeyCallback() + { + } + + virtual void operator() ( const void * key, int len ) + { + m_count++; + } + + virtual void reserve ( int keycount ) + { + }; + + int m_count; +}; + +//---------- + +template<typename hashtype> +struct HashCallback : public KeyCallback +{ + typedef std::vector<hashtype> hashvec; + + HashCallback ( pfHash hash, hashvec & hashes ) : m_hashes(hashes), m_pfHash(hash) + { + m_hashes.clear(); + } + + virtual void operator () ( const void * key, int len ) + { + size_t newsize = m_hashes.size() + 1; + + m_hashes.resize(newsize); + + m_pfHash(key,len,0,&m_hashes.back()); + } + + virtual void reserve ( int keycount ) + { + m_hashes.reserve(keycount); + } + + hashvec & m_hashes; + pfHash m_pfHash; + + //---------- + +private: + + HashCallback & operator = ( const HashCallback & ); +}; + +//---------- + +template<typename hashtype> +struct CollisionCallback : public KeyCallback +{ + typedef HashSet<hashtype> hashset; + typedef CollisionMap<hashtype,ByteVec> collmap; + + CollisionCallback ( pfHash hash, hashset & collisions, collmap & cmap ) + : m_pfHash(hash), + m_collisions(collisions), + m_collmap(cmap) + { + } + + virtual void operator () ( const void * key, int len ) + { + hashtype h; + + m_pfHash(key,len,0,&h); + + if(m_collisions.count(h)) + { + m_collmap[h].push_back( ByteVec(key,len) ); + } + } + + //---------- + + pfHash m_pfHash; + hashset & m_collisions; + collmap & m_collmap; + +private: + + CollisionCallback & operator = ( const CollisionCallback & c ); +}; + +//----------------------------------------------------------------------------- + +template < int _bits > +class Blob +{ +public: + + Blob() + { + for(size_t i = 0; i < sizeof(bytes); i++) + { + bytes[i] = 0; + } + } + + Blob ( int x ) + { + for(size_t i = 0; i < sizeof(bytes); i++) + { + bytes[i] = 0; + } + + *(int*)bytes = x; + } + + Blob ( const Blob & k ) + { + for(size_t i = 0; i < sizeof(bytes); i++) + { + bytes[i] = k.bytes[i]; + } + } + + Blob & operator = ( const Blob & k ) + { + for(size_t i = 0; i < sizeof(bytes); i++) + { + bytes[i] = k.bytes[i]; + } + + return *this; + } + + Blob ( uint64_t a, uint64_t b ) + { + uint64_t t[2] = {a,b}; + set(&t,16); + } + + void set ( const void * blob, size_t len ) + { + const uint8_t * k = (const uint8_t*)blob; + + len = len > sizeof(bytes) ? sizeof(bytes) : len; + + for(size_t i = 0; i < len; i++) + { + bytes[i] = k[i]; + } + + for(size_t i = len; i < sizeof(bytes); i++) + { + bytes[i] = 0; + } + } + + uint8_t & operator [] ( int i ) + { + return bytes[i]; + } + + const uint8_t & operator [] ( int i ) const + { + return bytes[i]; + } + + //---------- + // boolean operations + + bool operator < ( const Blob & k ) const + { + for(size_t i = 0; i < sizeof(bytes); i++) + { + if(bytes[i] < k.bytes[i]) return true; + if(bytes[i] > k.bytes[i]) return false; + } + + return false; + } + + bool operator == ( const Blob & k ) const + { + for(size_t i = 0; i < sizeof(bytes); i++) + { + if(bytes[i] != k.bytes[i]) return false; + } + + return true; + } + + bool operator != ( const Blob & k ) const + { + return !(*this == k); + } + + //---------- + // bitwise operations + + Blob operator ^ ( const Blob & k ) const + { + Blob t; + + for(size_t i = 0; i < sizeof(bytes); i++) + { + t.bytes[i] = bytes[i] ^ k.bytes[i]; + } + + return t; + } + + Blob & operator ^= ( const Blob & k ) + { + for(size_t i = 0; i < sizeof(bytes); i++) + { + bytes[i] ^= k.bytes[i]; + } + + return *this; + } + + int operator & ( int x ) + { + return (*(int*)bytes) & x; + } + + Blob & operator &= ( const Blob & k ) + { + for(size_t i = 0; i < sizeof(bytes); i++) + { + bytes[i] &= k.bytes[i]; + } + } + + Blob operator << ( int c ) + { + Blob t = *this; + + lshift(&t.bytes[0],sizeof(bytes),c); + + return t; + } + + Blob operator >> ( int c ) + { + Blob t = *this; + + rshift(&t.bytes[0],sizeof(bytes),c); + + return t; + } + + Blob & operator <<= ( int c ) + { + lshift(&bytes[0],sizeof(bytes),c); + + return *this; + } + + Blob & operator >>= ( int c ) + { + rshift(&bytes[0],sizeof(bytes),c); + + return *this; + } + + //---------- + +private: + + uint8_t bytes[(_bits+7)/8]; +}; + +typedef Blob<128> uint128_t; +typedef Blob<256> uint256_t; + +//----------------------------------------------------------------------------- @@ -1,100 +1,100 @@ -#include "Platform.h"
-
-/*
- * This file is derived from crc32.c from the zlib-1.1.3 distribution
- * by Jean-loup Gailly and Mark Adler.
- */
-
-/* crc32.c -- compute the CRC-32 of a data stream
- * Copyright (C) 1995-1998 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-
-/* ========================================================================
- * Table of CRC-32's of all single-byte values (made by make_crc_table)
- */
-static const uint32_t crc_table[256] = {
- 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
- 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
- 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
- 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
- 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
- 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
- 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
- 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
- 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
- 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
- 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
- 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
- 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
- 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
- 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
- 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
- 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
- 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
- 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
- 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
- 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
- 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
- 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
- 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
- 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
- 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
- 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
- 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
- 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
- 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
- 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
- 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
- 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
- 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
- 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
- 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
- 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
- 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
- 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
- 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
- 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
- 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
- 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
- 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
- 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
- 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
- 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
- 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
- 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
- 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
- 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
- 0x2d02ef8dL
-};
-
-/* ========================================================================= */
-
-#define DO1(buf) crc = crc_table[((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8);
-#define DO2(buf) DO1(buf); DO1(buf);
-#define DO4(buf) DO2(buf); DO2(buf);
-#define DO8(buf) DO4(buf); DO4(buf);
-
-/* ========================================================================= */
-
-void crc32 ( const void * key, int len, uint32_t seed, void * out )
-{
- uint8_t * buf = (uint8_t*)key;
- uint32_t crc = seed ^ 0xffffffffL;
-
- while (len >= 8)
- {
- DO8(buf);
- len -= 8;
- }
-
- while(len--)
- {
- DO1(buf);
- }
-
- crc ^= 0xffffffffL;
-
- *(uint32_t*)out = crc;
-}
+#include "Platform.h" + +/* + * This file is derived from crc32.c from the zlib-1.1.3 distribution + * by Jean-loup Gailly and Mark Adler. + */ + +/* crc32.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + + +/* ======================================================================== + * Table of CRC-32's of all single-byte values (made by make_crc_table) + */ +static const uint32_t crc_table[256] = { + 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, + 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, + 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, + 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, + 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, + 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, + 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, + 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, + 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, + 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, + 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, + 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, + 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, + 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, + 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, + 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, + 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, + 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, + 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, + 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, + 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, + 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, + 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, + 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, + 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, + 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, + 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, + 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, + 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, + 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, + 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, + 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, + 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, + 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, + 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, + 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, + 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, + 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, + 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, + 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, + 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, + 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, + 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, + 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, + 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, + 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, + 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, + 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, + 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, + 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, + 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, + 0x2d02ef8dL +}; + +/* ========================================================================= */ + +#define DO1(buf) crc = crc_table[((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8); +#define DO2(buf) DO1(buf); DO1(buf); +#define DO4(buf) DO2(buf); DO2(buf); +#define DO8(buf) DO4(buf); DO4(buf); + +/* ========================================================================= */ + +void crc32 ( const void * key, int len, uint32_t seed, void * out ) +{ + uint8_t * buf = (uint8_t*)key; + uint32_t crc = seed ^ 0xffffffffL; + + while (len >= 8) + { + DO8(buf); + len -= 8; + } + + while(len--) + { + DO1(buf); + } + + crc ^= 0xffffffffL; + + *(uint32_t*)out = crc; +} diff --git a/lookup3.cpp b/lookup3.cpp index 60087f1..63f00f8 100644 --- a/lookup3.cpp +++ b/lookup3.cpp @@ -1,72 +1,72 @@ -// lookup3 by Bob Jekins, code is public domain.
-
-#include "Platform.h"
-
-#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
-
-#define mix(a,b,c) \
-{ \
- a -= c; a ^= rot(c, 4); c += b; \
- b -= a; b ^= rot(a, 6); a += c; \
- c -= b; c ^= rot(b, 8); b += a; \
- a -= c; a ^= rot(c,16); c += b; \
- b -= a; b ^= rot(a,19); a += c; \
- c -= b; c ^= rot(b, 4); b += a; \
-}
-
-#define final(a,b,c) \
-{ \
- c ^= b; c -= rot(b,14); \
- a ^= c; a -= rot(c,11); \
- b ^= a; b -= rot(a,25); \
- c ^= b; c -= rot(b,16); \
- a ^= c; a -= rot(c,4); \
- b ^= a; b -= rot(a,14); \
- c ^= b; c -= rot(b,24); \
-}
-
-uint32_t lookup3 ( const void * key, int length, uint32_t initval )
-{
- uint32_t a,b,c; /* internal state */
-
- a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
-
- const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
-
- /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
- while (length > 12)
- {
- a += k[0];
- b += k[1];
- c += k[2];
- mix(a,b,c);
- length -= 12;
- k += 3;
- }
-
- switch(length)
- {
- case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
- case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
- case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
- case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
- case 8 : b+=k[1]; a+=k[0]; break;
- case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
- case 6 : b+=k[1]&0xffff; a+=k[0]; break;
- case 5 : b+=k[1]&0xff; a+=k[0]; break;
- case 4 : a+=k[0]; break;
- case 3 : a+=k[0]&0xffffff; break;
- case 2 : a+=k[0]&0xffff; break;
- case 1 : a+=k[0]&0xff; break;
- case 0 : { return c; } /* zero length strings require no mixing */
- }
-
- final(a,b,c);
-
- return c;
-}
-
-void lookup3_test ( const void * key, int len, uint32_t seed, void * out )
-{
- *(uint32_t*)out = lookup3(key,len,seed);
-}
+// lookup3 by Bob Jekins, code is public domain. + +#include "Platform.h" + +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c,4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +uint32_t lookup3 ( const void * key, int length, uint32_t initval ) +{ + uint32_t a,b,c; /* internal state */ + + a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; + + const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff; a+=k[0]; break; + case 6 : b+=k[1]&0xffff; a+=k[0]; break; + case 5 : b+=k[1]&0xff; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff; break; + case 2 : a+=k[0]&0xffff; break; + case 1 : a+=k[0]&0xff; break; + case 0 : { return c; } /* zero length strings require no mixing */ + } + + final(a,b,c); + + return c; +} + +void lookup3_test ( const void * key, int len, uint32_t seed, void * out ) +{ + *(uint32_t*)out = lookup3(key,len,seed); +} @@ -1,590 +1,595 @@ -#include "Platform.h"
-#include "Hashes.h"
-#include "KeysetTest.h"
-#include "SpeedTest.h"
-#include "AvalancheTest.h"
-#include "DifferentialTest.h"
-
-#include <stdio.h>
-#include <time.h>
-
-//-----------------------------------------------------------------------------
-// Configuration. TODO - move these to command-line flags
-
-bool g_testAll = false;
-
-bool g_testSanity = false;
-bool g_testSpeed = false;
-bool g_testDiff = false;
-bool g_testDiffDist = false;
-bool g_testAvalanche = false;
-bool g_testBIC = false;
-bool g_testCyclic = false;
-bool g_testTwoBytes = false;
-bool g_testSparse = false;
-bool g_testPermutation = false;
-bool g_testWindow = false;
-bool g_testText = false;
-bool g_testZeroes = false;
-bool g_testSeed = false;
-
-//-----------------------------------------------------------------------------
-// This is the list of all hashes that SMHasher can test.
-
-struct HashInfo
-{
- pfHash hash;
- int hashbits;
- uint32_t verification;
- const char * name;
- const char * desc;
-};
-
-HashInfo g_hashes[] =
-{
- { DoNothingHash, 32, 0x00000000, "donothing32", "Do-Nothing function (only valid for measuring call overhead)" },
- { DoNothingHash, 64, 0x00000000, "donothing64", "Do-Nothing function (only valid for measuring call overhead)" },
- { DoNothingHash, 128, 0x00000000, "donothing128", "Do-Nothing function (only valid for measuring call overhead)" },
-
- { crc32, 32, 0x3719DB20, "crc32", "CRC-32" },
-
- { md5_32, 32, 0xC10C356B, "md5_32a", "MD5, first 32 bits of result" },
- { sha1_32a, 32, 0xF9376EA7, "sha1_32a", "SHA1, first 32 bits of result" },
-
- { FNV, 32, 0xE3CBBE91, "FNV", "Fowler-Noll-Vo hash, 32-bit" },
- { lookup3_test, 32, 0x3D83917A, "lookup3", "Bob Jenkins' lookup3" },
- { SuperFastHash, 32, 0x980ACD1D, "superfast", "Paul Hsieh's SuperFastHash" },
- { MurmurOAAT_test, 32, 0x5363BD98, "MurmurOAAT", "Murmur one-at-a-time" },
- { Crap8_test, 32, 0x743E97A1, "Crap8", "Crap8" },
-
- { CityHash64_test, 64, 0x45754A6F, "City64", "Google CityHash128WithSeed" },
- { CityHash128_test, 128, 0x94B0EF46, "City128", "Google CityHash128WithSeed" },
-
- // MurmurHash2
-
- { MurmurHash2_test, 32, 0x27864C1E, "Murmur2", "MurmurHash2 for x86, 32-bit" },
- { MurmurHash2A_test, 32, 0x7FBD4396, "Murmur2A", "MurmurHash2A for x86, 32-bit" },
- { MurmurHash64A_test, 64, 0x1F0D3804, "Murmur2B", "MurmurHash2 for x64, 64-bit" },
- { MurmurHash64B_test, 64, 0xDD537C05, "Murmur2C", "MurmurHash2 for x86, 64-bit" },
-
- // MurmurHash3
-
- { MurmurHash3_x86_32, 32, 0xB0F57EE3, "Murmur3A", "MurmurHash3 for x86, 32-bit" },
- { MurmurHash3_x86_128, 128, 0xB3ECE62A, "Murmur3C", "MurmurHash3 for x86, 128-bit" },
- { MurmurHash3_x64_128, 128, 0x6384BA69, "Murmur3F", "MurmurHash3 for x64, 128-bit" },
-
-};
-
-HashInfo * findHash ( const char * name )
-{
- for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
- {
- if(_stricmp(name,g_hashes[i].name) == 0) return &g_hashes[i];
- }
-
- return NULL;
-}
-
-//-----------------------------------------------------------------------------
-// Self-test on startup - verify that all installed hashes work correctly.
-
-void SelfTest ( void )
-{
- bool pass = true;
-
- for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
- {
- HashInfo * info = & g_hashes[i];
-
- pass &= VerificationTest(info->hash,info->hashbits,info->verification,false);
- }
-
- if(!pass)
- {
- printf("Self-test FAILED!\n");
-
- for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++)
- {
- HashInfo * info = & g_hashes[i];
-
- printf("%16s - ",info->name);
- pass &= VerificationTest(info->hash,info->hashbits,info->verification,true);
- }
-
- exit(1);
- }
-}
-
-//----------------------------------------------------------------------------
-
-template < typename hashtype >
-void test ( hashfunc<hashtype> hash, HashInfo * info )
-{
- const int hashbits = sizeof(hashtype) * 8;
-
- printf("-------------------------------------------------------------------------------\n");
- printf("--- Testing %s (%s)\n\n",info->name,info->desc);
-
- //-----------------------------------------------------------------------------
- // Sanity tests
-
- if(g_testSanity || g_testAll)
- {
- printf("[[[ Sanity Tests ]]]\n\n");
-
- VerificationTest(hash,hashbits,info->verification,true);
- SanityTest(hash,hashbits);
- AppendedZeroesTest(hash,hashbits);
- printf("\n");
- }
-
- //-----------------------------------------------------------------------------
- // Speed tests
-
- if(g_testSpeed || g_testAll)
- {
- printf("[[[ Speed Tests ]]]\n\n");
-
- BulkSpeedTest(info->hash,info->verification);
- printf("\n");
-
- for(int i = 1; i < 32; i++)
- {
- double cycles;
-
- TinySpeedTest(hashfunc<hashtype>(info->hash),sizeof(hashtype),i,info->verification,true,cycles);
- }
-
- printf("\n");
- }
-
- //-----------------------------------------------------------------------------
- // Differential tests
-
- if(g_testDiff || g_testAll)
- {
- printf("[[[ Differential Tests ]]]\n\n");
-
- bool result = true;
- bool dumpCollisions = false;
-
- result &= DiffTest< Blob<64>, hashtype >(hash,5,1000,dumpCollisions);
- result &= DiffTest< Blob<128>, hashtype >(hash,4,1000,dumpCollisions);
- result &= DiffTest< Blob<256>, hashtype >(hash,3,1000,dumpCollisions);
-
- if(!result) printf("*********FAIL*********\n");
- printf("\n");
- }
-
- //-----------------------------------------------------------------------------
- // Differential-distribution tests
-
- if(g_testDiffDist /*|| g_testAll*/)
- {
- printf("[[[ Differential Distribution Tests ]]]\n\n");
-
- bool result = true;
-
- result &= DiffDistTest2<uint64_t,hashtype>(hash);
-
- printf("\n");
- }
-
- //-----------------------------------------------------------------------------
- // Avalanche tests
-
- if(g_testAvalanche || g_testAll)
- {
- printf("[[[ Avalanche Tests ]]]\n\n");
-
- bool result = true;
-
- result &= AvalancheTest< Blob< 32>, hashtype > (hash,300000);
- result &= AvalancheTest< Blob< 40>, hashtype > (hash,300000);
- result &= AvalancheTest< Blob< 48>, hashtype > (hash,300000);
- result &= AvalancheTest< Blob< 56>, hashtype > (hash,300000);
-
- result &= AvalancheTest< Blob< 64>, hashtype > (hash,300000);
- result &= AvalancheTest< Blob< 72>, hashtype > (hash,300000);
- result &= AvalancheTest< Blob< 80>, hashtype > (hash,300000);
- result &= AvalancheTest< Blob< 88>, hashtype > (hash,300000);
-
- result &= AvalancheTest< Blob< 96>, hashtype > (hash,300000);
- result &= AvalancheTest< Blob<104>, hashtype > (hash,300000);
- result &= AvalancheTest< Blob<112>, hashtype > (hash,300000);
- result &= AvalancheTest< Blob<120>, hashtype > (hash,300000);
-
- result &= AvalancheTest< Blob<128>, hashtype > (hash,300000);
- result &= AvalancheTest< Blob<136>, hashtype > (hash,300000);
- result &= AvalancheTest< Blob<144>, hashtype > (hash,300000);
- result &= AvalancheTest< Blob<152>, hashtype > (hash,300000);
-
- if(!result) printf("*********FAIL*********\n");
- printf("\n");
- }
-
- //-----------------------------------------------------------------------------
- // Bit Independence Criteria. Interesting, but doesn't tell us much about
- // collision or distribution.
-
- if(g_testBIC)
- {
- printf("[[[ Bit Independence Criteria ]]]\n\n");
-
- bool result = true;
-
- //result &= BicTest<uint64_t,hashtype>(hash,2000000);
- BicTest3<Blob<88>,hashtype>(hash,2000000);
-
- if(!result) printf("*********FAIL*********\n");
- printf("\n");
- }
-
- //-----------------------------------------------------------------------------
- // Keyset 'Cyclic' - keys of the form "abcdabcdabcd..."
-
- if(g_testCyclic || g_testAll)
- {
- printf("[[[ Keyset 'Cyclic' Tests ]]]\n\n");
-
- bool result = true;
- bool drawDiagram = false;
-
- result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+0,8,10000000,drawDiagram);
- result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+1,8,10000000,drawDiagram);
- result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+2,8,10000000,drawDiagram);
- result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+3,8,10000000,drawDiagram);
- result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+4,8,10000000,drawDiagram);
-
- if(!result) printf("*********FAIL*********\n");
- printf("\n");
- }
-
- //-----------------------------------------------------------------------------
- // Keyset 'TwoBytes' - all keys up to N bytes containing two non-zero bytes
-
- // This generates some huge keysets, 128-bit tests will take ~1.3 gigs of RAM.
-
- if(g_testTwoBytes || g_testAll)
- {
- printf("[[[ Keyset 'TwoBytes' Tests ]]]\n\n");
-
- bool result = true;
- bool drawDiagram = false;
-
- for(int i = 4; i <= 20; i += 4)
- {
- result &= TwoBytesTest2<hashtype>(hash,i,drawDiagram);
- }
-
- if(!result) printf("*********FAIL*********\n");
- printf("\n");
- }
-
- //-----------------------------------------------------------------------------
- // Keyset 'Sparse' - keys with all bits 0 except a few
-
- if(g_testSparse || g_testAll)
- {
- printf("[[[ Keyset 'Sparse' Tests ]]]\n\n");
-
- bool result = true;
- bool drawDiagram = false;
-
- result &= SparseKeyTest< 32,hashtype>(hash,6,true,true,true,drawDiagram);
- result &= SparseKeyTest< 40,hashtype>(hash,6,true,true,true,drawDiagram);
- result &= SparseKeyTest< 48,hashtype>(hash,5,true,true,true,drawDiagram);
- result &= SparseKeyTest< 56,hashtype>(hash,5,true,true,true,drawDiagram);
- result &= SparseKeyTest< 64,hashtype>(hash,5,true,true,true,drawDiagram);
- result &= SparseKeyTest< 96,hashtype>(hash,4,true,true,true,drawDiagram);
- result &= SparseKeyTest< 256,hashtype>(hash,3,true,true,true,drawDiagram);
- result &= SparseKeyTest<2048,hashtype>(hash,2,true,true,true,drawDiagram);
-
- if(!result) printf("*********FAIL*********\n");
- printf("\n");
- }
-
- //-----------------------------------------------------------------------------
- // Keyset 'Permutation' - all possible combinations of a set of blocks
-
- if(g_testPermutation || g_testAll)
- {
- {
- // This one breaks lookup3, surprisingly
-
- printf("[[[ Keyset 'Combination Lowbits' Tests ]]]\n\n");
-
- bool result = true;
- bool drawDiagram = false;
-
- uint32_t blocks[] =
- {
- 0x00000000,
-
- 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
- };
-
- result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
-
- if(!result) printf("*********FAIL*********\n");
- printf("\n");
- }
-
- {
- printf("[[[ Keyset 'Combination Highbits' Tests ]]]\n\n");
-
- bool result = true;
- bool drawDiagram = false;
-
- uint32_t blocks[] =
- {
- 0x00000000,
-
- 0x20000000, 0x40000000, 0x60000000, 0x80000000, 0xA0000000, 0xC0000000, 0xE0000000
- };
-
- result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
-
- if(!result) printf("*********FAIL*********\n");
- printf("\n");
- }
-
- {
- printf("[[[ Keyset 'Combination 0x8000000' Tests ]]]\n\n");
-
- bool result = true;
- bool drawDiagram = false;
-
- uint32_t blocks[] =
- {
- 0x00000000,
-
- 0x80000000,
- };
-
- result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
-
- if(!result) printf("*********FAIL*********\n");
- printf("\n");
- }
-
- {
- printf("[[[ Keyset 'Combination 0x0000001' Tests ]]]\n\n");
-
- bool result = true;
- bool drawDiagram = false;
-
- uint32_t blocks[] =
- {
- 0x00000000,
-
- 0x00000001,
- };
-
- result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
-
- if(!result) printf("*********FAIL*********\n");
- printf("\n");
- }
-
- {
- printf("[[[ Keyset 'Combination Hi-Lo' Tests ]]]\n\n");
-
- bool result = true;
- bool drawDiagram = false;
-
- uint32_t blocks[] =
- {
- 0x00000000,
-
- 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
-
- 0x80000000, 0x40000000, 0xC0000000, 0x20000000, 0xA0000000, 0x60000000, 0xE0000000
- };
-
- result &= CombinationKeyTest<hashtype>(hash,6,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram);
-
- if(!result) printf("*********FAIL*********\n");
- printf("\n");
- }
- }
-
- //-----------------------------------------------------------------------------
- // Keyset 'Window'
-
- // Skip distribution test for these - they're too easy to distribute well,
- // and it generates a _lot_ of testing
-
- if(g_testWindow || g_testAll)
- {
- printf("[[[ Keyset 'Window' Tests ]]]\n\n");
-
- bool result = true;
- bool testCollision = true;
- bool testDistribution = false;
- bool drawDiagram = false;
-
- result &= WindowedKeyTest< Blob<hashbits*2>, hashtype > ( hash, 20, testCollision, testDistribution, drawDiagram );
-
- if(!result) printf("*********FAIL*********\n");
- printf("\n");
- }
-
- //-----------------------------------------------------------------------------
- // Keyset 'Text'
-
- if(g_testText || g_testAll)
- {
- printf("[[[ Keyset 'Text' Tests ]]]\n\n");
-
- bool result = true;
- bool drawDiagram = false;
-
- const char * alnum = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
-
- result &= TextKeyTest( hash, "Foo", alnum,4, "Bar", drawDiagram );
- result &= TextKeyTest( hash, "FooBar", alnum,4, "", drawDiagram );
- result &= TextKeyTest( hash, "", alnum,4, "FooBar", drawDiagram );
-
- if(!result) printf("*********FAIL*********\n");
- printf("\n");
- }
-
- //-----------------------------------------------------------------------------
- // Keyset 'Zeroes'
-
- if(g_testZeroes || g_testAll)
- {
- printf("[[[ Keyset 'Zeroes' Tests ]]]\n\n");
-
- bool result = true;
- bool drawDiagram = false;
-
- result &= ZeroKeyTest<hashtype>( hash, drawDiagram );
-
- if(!result) printf("*********FAIL*********\n");
- printf("\n");
- }
-
- //-----------------------------------------------------------------------------
- // Keyset 'Seed'
-
- if(g_testSeed || g_testAll)
- {
- printf("[[[ Keyset 'Seed' Tests ]]]\n\n");
-
- bool result = true;
- bool drawDiagram = false;
-
- result &= SeedTest<hashtype>( hash, 1000000, drawDiagram );
-
- if(!result) printf("*********FAIL*********\n");
- printf("\n");
- }
-}
-
-//-----------------------------------------------------------------------------
-
-uint32_t g_inputVCode = 1;
-uint32_t g_outputVCode = 1;
-uint32_t g_resultVCode = 1;
-
-HashInfo * g_hashUnderTest = NULL;
-
-void VerifyHash ( const void * key, int len, uint32_t seed, void * out )
-{
- g_inputVCode = MurmurOAAT(key,len,g_inputVCode);
- g_inputVCode = MurmurOAAT(&seed,sizeof(uint32_t),g_inputVCode);
-
- g_hashUnderTest->hash(key,len,seed,out);
-
- g_outputVCode = MurmurOAAT(out,g_hashUnderTest->hashbits/8,g_outputVCode);
-}
-
-//-----------------------------------------------------------------------------
-
-void testHash ( const char * name )
-{
- HashInfo * pInfo = findHash(name);
-
- if(pInfo == NULL)
- {
- printf("Invalid hash '%s' specified\n",name);
- return;
- }
- else
- {
- g_hashUnderTest = pInfo;
-
- if(pInfo->hashbits == 32)
- {
- test<uint32_t>( VerifyHash, pInfo );
- }
- else if(pInfo->hashbits == 64)
- {
- test<uint64_t>( pInfo->hash, pInfo );
- }
- else if(pInfo->hashbits == 128)
- {
- test<uint128_t>( pInfo->hash, pInfo );
- }
- else if(pInfo->hashbits == 256)
- {
- test<uint256_t>( pInfo->hash, pInfo );
- }
- else
- {
- printf("Invalid hash bit width %d for hash '%s'",pInfo->hashbits,pInfo->name);
- }
- }
-}
-//-----------------------------------------------------------------------------
-
-int main ( int argc, char ** argv )
-{
- const char * hashToTest = "murmur3a";
-
- if(argc < 2)
- {
- printf("(No test hash given on command line, testing Murmur3_x86_32.)\n");
- }
- else
- {
- hashToTest = argv[1];
- }
-
- // Code runs on the 3rd CPU by default
-
- SetAffinity((1 << 2));
-
- SelfTest();
-
- int timeBegin = clock();
-
- g_testAll = true;
-
- //g_testSanity = true;
- //g_testSpeed = true;
- //g_testAvalanche = true;
- //g_testBIC = true;
- //g_testCyclic = true;
- //g_testTwoBytes = true;
- //g_testDiff = true;
- //g_testDiffDist = true;
- //g_testSparse = true;
- //g_testPermutation = true;
- //g_testWindow = true;
- //g_testZeroes = true;
-
- testHash(hashToTest);
-
- //----------
-
- int timeEnd = clock();
-
- printf("\n");
- printf("Input vcode 0x%08x, Output vcode 0x%08x, Result vcode 0x%08x\n",g_inputVCode,g_outputVCode,g_resultVCode);
- printf("Verification value is 0x%08x - Testing took %f seconds\n",g_verify,double(timeEnd-timeBegin)/double(CLOCKS_PER_SEC));
- printf("-------------------------------------------------------------------------------\n");
- return 0;
-}
+#include "Platform.h" +#include "Hashes.h" +#include "KeysetTest.h" +#include "SpeedTest.h" +#include "AvalancheTest.h" +#include "DifferentialTest.h" + +#include <stdio.h> +#include <time.h> + +//----------------------------------------------------------------------------- +// Configuration. TODO - move these to command-line flags + +bool g_testAll = false; + +bool g_testSanity = false; +bool g_testSpeed = false; +bool g_testDiff = false; +bool g_testDiffDist = false; +bool g_testAvalanche = false; +bool g_testBIC = false; +bool g_testCyclic = false; +bool g_testTwoBytes = false; +bool g_testSparse = false; +bool g_testPermutation = false; +bool g_testWindow = false; +bool g_testText = false; +bool g_testZeroes = false; +bool g_testSeed = false; + +//----------------------------------------------------------------------------- +// This is the list of all hashes that SMHasher can test. + +struct HashInfo +{ + pfHash hash; + int hashbits; + uint32_t verification; + const char * name; + const char * desc; +}; + +HashInfo g_hashes[] = +{ + { DoNothingHash, 32, 0x00000000, "donothing32", "Do-Nothing function (only valid for measuring call overhead)" }, + { DoNothingHash, 64, 0x00000000, "donothing64", "Do-Nothing function (only valid for measuring call overhead)" }, + { DoNothingHash, 128, 0x00000000, "donothing128", "Do-Nothing function (only valid for measuring call overhead)" }, + + { crc32, 32, 0x3719DB20, "crc32", "CRC-32" }, + + { md5_32, 32, 0xC10C356B, "md5_32a", "MD5, first 32 bits of result" }, + { sha1_32a, 32, 0xF9376EA7, "sha1_32a", "SHA1, first 32 bits of result" }, + + { FNV, 32, 0xE3CBBE91, "FNV", "Fowler-Noll-Vo hash, 32-bit" }, + { Bernstein, 32, 0xBDB4B640, "bernstein", "Bernstein, 32-bit" }, + { lookup3_test, 32, 0x3D83917A, "lookup3", "Bob Jenkins' lookup3" }, + { SuperFastHash, 32, 0x980ACD1D, "superfast", "Paul Hsieh's SuperFastHash" }, + { MurmurOAAT_test, 32, 0x5363BD98, "MurmurOAAT", "Murmur one-at-a-time" }, + { Crap8_test, 32, 0x743E97A1, "Crap8", "Crap8" }, + + { CityHash64_test, 64, 0x25A20825, "City64", "Google CityHash64WithSeed" }, + { CityHash128_test, 128, 0x6531F54E, "City128", "Google CityHash128WithSeed" }, + + { SpookyHash64_test, 32, 0x3F798BBB, "Spooky32", "Bob Jenkins' SpookyHash, 32-bit result" }, + { SpookyHash64_test, 64, 0xA7F955F1, "Spooky64", "Bob Jenkins' SpookyHash, 64-bit result" }, + { SpookyHash128_test, 128, 0x8D263080, "Spooky128", "Bob Jenkins' SpookyHash, 128-bit result" }, + + // MurmurHash2 + + { MurmurHash2_test, 32, 0x27864C1E, "Murmur2", "MurmurHash2 for x86, 32-bit" }, + { MurmurHash2A_test, 32, 0x7FBD4396, "Murmur2A", "MurmurHash2A for x86, 32-bit" }, + { MurmurHash64A_test, 64, 0x1F0D3804, "Murmur2B", "MurmurHash2 for x64, 64-bit" }, + { MurmurHash64B_test, 64, 0xDD537C05, "Murmur2C", "MurmurHash2 for x86, 64-bit" }, + + // MurmurHash3 + + { MurmurHash3_x86_32, 32, 0xB0F57EE3, "Murmur3A", "MurmurHash3 for x86, 32-bit" }, + { MurmurHash3_x86_128, 128, 0xB3ECE62A, "Murmur3C", "MurmurHash3 for x86, 128-bit" }, + { MurmurHash3_x64_128, 128, 0x6384BA69, "Murmur3F", "MurmurHash3 for x64, 128-bit" }, + +}; + +HashInfo * findHash ( const char * name ) +{ + for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++) + { + if(_stricmp(name,g_hashes[i].name) == 0) return &g_hashes[i]; + } + + return NULL; +} + +//----------------------------------------------------------------------------- +// Self-test on startup - verify that all installed hashes work correctly. + +void SelfTest ( void ) +{ + bool pass = true; + + for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++) + { + HashInfo * info = & g_hashes[i]; + + pass &= VerificationTest(info->hash,info->hashbits,info->verification,false); + } + + if(!pass) + { + printf("Self-test FAILED!\n"); + + for(size_t i = 0; i < sizeof(g_hashes) / sizeof(HashInfo); i++) + { + HashInfo * info = & g_hashes[i]; + + printf("%16s - ",info->name); + pass &= VerificationTest(info->hash,info->hashbits,info->verification,true); + } + + exit(1); + } +} + +//---------------------------------------------------------------------------- + +template < typename hashtype > +void test ( hashfunc<hashtype> hash, HashInfo * info ) +{ + const int hashbits = sizeof(hashtype) * 8; + + printf("-------------------------------------------------------------------------------\n"); + printf("--- Testing %s (%s)\n\n",info->name,info->desc); + + //----------------------------------------------------------------------------- + // Sanity tests + + if(g_testSanity || g_testAll) + { + printf("[[[ Sanity Tests ]]]\n\n"); + + VerificationTest(hash,hashbits,info->verification,true); + SanityTest(hash,hashbits); + AppendedZeroesTest(hash,hashbits); + printf("\n"); + } + + //----------------------------------------------------------------------------- + // Speed tests + + if(g_testSpeed || g_testAll) + { + printf("[[[ Speed Tests ]]]\n\n"); + + BulkSpeedTest(info->hash,info->verification); + printf("\n"); + + for(int i = 1; i < 32; i++) + { + double cycles; + + TinySpeedTest(hashfunc<hashtype>(info->hash),sizeof(hashtype),i,info->verification,true,cycles); + } + + printf("\n"); + } + + //----------------------------------------------------------------------------- + // Differential tests + + if(g_testDiff || g_testAll) + { + printf("[[[ Differential Tests ]]]\n\n"); + + bool result = true; + bool dumpCollisions = false; + + result &= DiffTest< Blob<64>, hashtype >(hash,5,1000,dumpCollisions); + result &= DiffTest< Blob<128>, hashtype >(hash,4,1000,dumpCollisions); + result &= DiffTest< Blob<256>, hashtype >(hash,3,1000,dumpCollisions); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + + //----------------------------------------------------------------------------- + // Differential-distribution tests + + if(g_testDiffDist /*|| g_testAll*/) + { + printf("[[[ Differential Distribution Tests ]]]\n\n"); + + bool result = true; + + result &= DiffDistTest2<uint64_t,hashtype>(hash); + + printf("\n"); + } + + //----------------------------------------------------------------------------- + // Avalanche tests + + if(g_testAvalanche || g_testAll) + { + printf("[[[ Avalanche Tests ]]]\n\n"); + + bool result = true; + + result &= AvalancheTest< Blob< 32>, hashtype > (hash,300000); + result &= AvalancheTest< Blob< 40>, hashtype > (hash,300000); + result &= AvalancheTest< Blob< 48>, hashtype > (hash,300000); + result &= AvalancheTest< Blob< 56>, hashtype > (hash,300000); + + result &= AvalancheTest< Blob< 64>, hashtype > (hash,300000); + result &= AvalancheTest< Blob< 72>, hashtype > (hash,300000); + result &= AvalancheTest< Blob< 80>, hashtype > (hash,300000); + result &= AvalancheTest< Blob< 88>, hashtype > (hash,300000); + + result &= AvalancheTest< Blob< 96>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<104>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<112>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<120>, hashtype > (hash,300000); + + result &= AvalancheTest< Blob<128>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<136>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<144>, hashtype > (hash,300000); + result &= AvalancheTest< Blob<152>, hashtype > (hash,300000); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + + //----------------------------------------------------------------------------- + // Bit Independence Criteria. Interesting, but doesn't tell us much about + // collision or distribution. + + if(g_testBIC) + { + printf("[[[ Bit Independence Criteria ]]]\n\n"); + + bool result = true; + + //result &= BicTest<uint64_t,hashtype>(hash,2000000); + BicTest3<Blob<88>,hashtype>(hash,2000000); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + + //----------------------------------------------------------------------------- + // Keyset 'Cyclic' - keys of the form "abcdabcdabcd..." + + if(g_testCyclic || g_testAll) + { + printf("[[[ Keyset 'Cyclic' Tests ]]]\n\n"); + + bool result = true; + bool drawDiagram = false; + + result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+0,8,10000000,drawDiagram); + result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+1,8,10000000,drawDiagram); + result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+2,8,10000000,drawDiagram); + result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+3,8,10000000,drawDiagram); + result &= CyclicKeyTest<hashtype>(hash,sizeof(hashtype)+4,8,10000000,drawDiagram); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + + //----------------------------------------------------------------------------- + // Keyset 'TwoBytes' - all keys up to N bytes containing two non-zero bytes + + // This generates some huge keysets, 128-bit tests will take ~1.3 gigs of RAM. + + if(g_testTwoBytes || g_testAll) + { + printf("[[[ Keyset 'TwoBytes' Tests ]]]\n\n"); + + bool result = true; + bool drawDiagram = false; + + for(int i = 4; i <= 20; i += 4) + { + result &= TwoBytesTest2<hashtype>(hash,i,drawDiagram); + } + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + + //----------------------------------------------------------------------------- + // Keyset 'Sparse' - keys with all bits 0 except a few + + if(g_testSparse || g_testAll) + { + printf("[[[ Keyset 'Sparse' Tests ]]]\n\n"); + + bool result = true; + bool drawDiagram = false; + + result &= SparseKeyTest< 32,hashtype>(hash,6,true,true,true,drawDiagram); + result &= SparseKeyTest< 40,hashtype>(hash,6,true,true,true,drawDiagram); + result &= SparseKeyTest< 48,hashtype>(hash,5,true,true,true,drawDiagram); + result &= SparseKeyTest< 56,hashtype>(hash,5,true,true,true,drawDiagram); + result &= SparseKeyTest< 64,hashtype>(hash,5,true,true,true,drawDiagram); + result &= SparseKeyTest< 96,hashtype>(hash,4,true,true,true,drawDiagram); + result &= SparseKeyTest< 256,hashtype>(hash,3,true,true,true,drawDiagram); + result &= SparseKeyTest<2048,hashtype>(hash,2,true,true,true,drawDiagram); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + + //----------------------------------------------------------------------------- + // Keyset 'Permutation' - all possible combinations of a set of blocks + + if(g_testPermutation || g_testAll) + { + { + // This one breaks lookup3, surprisingly + + printf("[[[ Keyset 'Combination Lowbits' Tests ]]]\n\n"); + + bool result = true; + bool drawDiagram = false; + + uint32_t blocks[] = + { + 0x00000000, + + 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007, + }; + + result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + + { + printf("[[[ Keyset 'Combination Highbits' Tests ]]]\n\n"); + + bool result = true; + bool drawDiagram = false; + + uint32_t blocks[] = + { + 0x00000000, + + 0x20000000, 0x40000000, 0x60000000, 0x80000000, 0xA0000000, 0xC0000000, 0xE0000000 + }; + + result &= CombinationKeyTest<hashtype>(hash,8,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + + { + printf("[[[ Keyset 'Combination 0x8000000' Tests ]]]\n\n"); + + bool result = true; + bool drawDiagram = false; + + uint32_t blocks[] = + { + 0x00000000, + + 0x80000000, + }; + + result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + + { + printf("[[[ Keyset 'Combination 0x0000001' Tests ]]]\n\n"); + + bool result = true; + bool drawDiagram = false; + + uint32_t blocks[] = + { + 0x00000000, + + 0x00000001, + }; + + result &= CombinationKeyTest<hashtype>(hash,20,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + + { + printf("[[[ Keyset 'Combination Hi-Lo' Tests ]]]\n\n"); + + bool result = true; + bool drawDiagram = false; + + uint32_t blocks[] = + { + 0x00000000, + + 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007, + + 0x80000000, 0x40000000, 0xC0000000, 0x20000000, 0xA0000000, 0x60000000, 0xE0000000 + }; + + result &= CombinationKeyTest<hashtype>(hash,6,blocks,sizeof(blocks) / sizeof(uint32_t),true,true,drawDiagram); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + } + + //----------------------------------------------------------------------------- + // Keyset 'Window' + + // Skip distribution test for these - they're too easy to distribute well, + // and it generates a _lot_ of testing + + if(g_testWindow || g_testAll) + { + printf("[[[ Keyset 'Window' Tests ]]]\n\n"); + + bool result = true; + bool testCollision = true; + bool testDistribution = false; + bool drawDiagram = false; + + result &= WindowedKeyTest< Blob<hashbits*2>, hashtype > ( hash, 20, testCollision, testDistribution, drawDiagram ); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + + //----------------------------------------------------------------------------- + // Keyset 'Text' + + if(g_testText || g_testAll) + { + printf("[[[ Keyset 'Text' Tests ]]]\n\n"); + + bool result = true; + bool drawDiagram = false; + + const char * alnum = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; + + result &= TextKeyTest( hash, "Foo", alnum,4, "Bar", drawDiagram ); + result &= TextKeyTest( hash, "FooBar", alnum,4, "", drawDiagram ); + result &= TextKeyTest( hash, "", alnum,4, "FooBar", drawDiagram ); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + + //----------------------------------------------------------------------------- + // Keyset 'Zeroes' + + if(g_testZeroes || g_testAll) + { + printf("[[[ Keyset 'Zeroes' Tests ]]]\n\n"); + + bool result = true; + bool drawDiagram = false; + + result &= ZeroKeyTest<hashtype>( hash, drawDiagram ); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } + + //----------------------------------------------------------------------------- + // Keyset 'Seed' + + if(g_testSeed || g_testAll) + { + printf("[[[ Keyset 'Seed' Tests ]]]\n\n"); + + bool result = true; + bool drawDiagram = false; + + result &= SeedTest<hashtype>( hash, 1000000, drawDiagram ); + + if(!result) printf("*********FAIL*********\n"); + printf("\n"); + } +} + +//----------------------------------------------------------------------------- + +uint32_t g_inputVCode = 1; +uint32_t g_outputVCode = 1; +uint32_t g_resultVCode = 1; + +HashInfo * g_hashUnderTest = NULL; + +void VerifyHash ( const void * key, int len, uint32_t seed, void * out ) +{ + g_inputVCode = MurmurOAAT(key,len,g_inputVCode); + g_inputVCode = MurmurOAAT(&seed,sizeof(uint32_t),g_inputVCode); + + g_hashUnderTest->hash(key,len,seed,out); + + g_outputVCode = MurmurOAAT(out,g_hashUnderTest->hashbits/8,g_outputVCode); +} + +//----------------------------------------------------------------------------- + +void testHash ( const char * name ) +{ + HashInfo * pInfo = findHash(name); + + if(pInfo == NULL) + { + printf("Invalid hash '%s' specified\n",name); + return; + } + else + { + g_hashUnderTest = pInfo; + + if(pInfo->hashbits == 32) + { + test<uint32_t>( VerifyHash, pInfo ); + } + else if(pInfo->hashbits == 64) + { + test<uint64_t>( pInfo->hash, pInfo ); + } + else if(pInfo->hashbits == 128) + { + test<uint128_t>( pInfo->hash, pInfo ); + } + else if(pInfo->hashbits == 256) + { + test<uint256_t>( pInfo->hash, pInfo ); + } + else + { + printf("Invalid hash bit width %d for hash '%s'",pInfo->hashbits,pInfo->name); + } + } +} +//----------------------------------------------------------------------------- + +int main ( int argc, char ** argv ) +{ + const char * hashToTest = "murmur3a"; + + if(argc < 2) + { + printf("(No test hash given on command line, testing Murmur3_x86_32.)\n"); + } + else + { + hashToTest = argv[1]; + } + + // Code runs on the 3rd CPU by default + + SetAffinity((1 << 2)); + + SelfTest(); + + int timeBegin = clock(); + + g_testAll = true; + + //g_testSanity = true; + //g_testSpeed = true; + //g_testAvalanche = true; + //g_testBIC = true; + //g_testCyclic = true; + //g_testTwoBytes = true; + //g_testDiff = true; + //g_testDiffDist = true; + //g_testSparse = true; + //g_testPermutation = true; + //g_testWindow = true; + //g_testZeroes = true; + + testHash(hashToTest); + + //---------- + + int timeEnd = clock(); + + printf("\n"); + printf("Input vcode 0x%08x, Output vcode 0x%08x, Result vcode 0x%08x\n",g_inputVCode,g_outputVCode,g_resultVCode); + printf("Verification value is 0x%08x - Testing took %f seconds\n",g_verify,double(timeEnd-timeBegin)/double(CLOCKS_PER_SEC)); + printf("-------------------------------------------------------------------------------\n"); + return 0; +} @@ -1,382 +1,382 @@ -#include <memory.h>
-#include "Types.h"
-
-// "Derived from the RSA Data Security, Inc. MD5 Message Digest Algorithm"
-
-/**
- * \brief MD5 context structure
- */
-typedef struct
-{
- unsigned long total[2]; /*!< number of bytes processed */
- unsigned long state[4]; /*!< intermediate digest state */
- unsigned char buffer[64]; /*!< data block being processed */
-
- unsigned char ipad[64]; /*!< HMAC: inner padding */
- unsigned char opad[64]; /*!< HMAC: outer padding */
-}
-md5_context;
-
-/**
- * \brief MD5 context setup
- *
- * \param ctx context to be initialized
- */
-void md5_starts( md5_context *ctx );
-
-/**
- * \brief MD5 process buffer
- *
- * \param ctx MD5 context
- * \param input buffer holding the data
- * \param ilen length of the input data
- */
-void md5_update( md5_context *ctx, unsigned char *input, int ilen );
-
-/**
- * \brief MD5 final digest
- *
- * \param ctx MD5 context
- * \param output MD5 checksum result
- */
-void md5_finish( md5_context *ctx, unsigned char output[16] );
-
-/**
- * \brief Output = MD5( input buffer )
- *
- * \param input buffer holding the data
- * \param ilen length of the input data
- * \param output MD5 checksum result
- */
-void md5( unsigned char *input, int ilen, unsigned char output[16] );
-
-/**
- * \brief Output = MD5( file contents )
- *
- * \param path input file name
- * \param output MD5 checksum result
- *
- * \return 0 if successful, 1 if fopen failed,
- * or 2 if fread failed
- */
-int md5_file( char *path, unsigned char output[16] );
-
-/**
- * \brief MD5 HMAC context setup
- *
- * \param ctx HMAC context to be initialized
- * \param key HMAC secret key
- * \param keylen length of the HMAC key
- */
-void md5_hmac_starts( md5_context *ctx, unsigned char *key, int keylen );
-
-/**
- * \brief MD5 HMAC process buffer
- *
- * \param ctx HMAC context
- * \param input buffer holding the data
- * \param ilen length of the input data
- */
-void md5_hmac_update( md5_context *ctx, unsigned char *input, int ilen );
-
-/**
- * \brief MD5 HMAC final digest
- *
- * \param ctx HMAC context
- * \param output MD5 HMAC checksum result
- */
-void md5_hmac_finish( md5_context *ctx, unsigned char output[16] );
-
-/**
- * \brief Output = HMAC-MD5( hmac key, input buffer )
- *
- * \param key HMAC secret key
- * \param keylen length of the HMAC key
- * \param input buffer holding the data
- * \param ilen length of the input data
- * \param output HMAC-MD5 result
- */
-void md5_hmac( unsigned char *key, int keylen,
- unsigned char *input, int ilen,
- unsigned char output[16] );
-
-/**
- * \brief Checkup routine
- *
- * \return 0 if successful, or 1 if the test failed
- */
-int md5_self_test( int verbose );
-
-/*
- * 32-bit integer manipulation macros (little endian)
- */
-#ifndef GET_ULONG_LE
-#define GET_ULONG_LE(n,b,i) \
-{ \
- (n) = ( (unsigned long) (b)[(i) ] ) \
- | ( (unsigned long) (b)[(i) + 1] << 8 ) \
- | ( (unsigned long) (b)[(i) + 2] << 16 ) \
- | ( (unsigned long) (b)[(i) + 3] << 24 ); \
-}
-#endif
-
-#ifndef PUT_ULONG_LE
-#define PUT_ULONG_LE(n,b,i) \
-{ \
- (b)[(i) ] = (unsigned char) ( (n) ); \
- (b)[(i) + 1] = (unsigned char) ( (n) >> 8 ); \
- (b)[(i) + 2] = (unsigned char) ( (n) >> 16 ); \
- (b)[(i) + 3] = (unsigned char) ( (n) >> 24 ); \
-}
-#endif
-
-/*
- * MD5 context setup
- */
-void md5_starts( md5_context *ctx )
-{
- ctx->total[0] = 0;
- ctx->total[1] = 0;
-
- ctx->state[0] = 0x67452301;
- ctx->state[1] = 0xEFCDAB89;
- ctx->state[2] = 0x98BADCFE;
- ctx->state[3] = 0x10325476;
-}
-
-static void md5_process( md5_context *ctx, unsigned char data[64] )
-{
- unsigned long X[16], A, B, C, D;
-
- GET_ULONG_LE( X[ 0], data, 0 );
- GET_ULONG_LE( X[ 1], data, 4 );
- GET_ULONG_LE( X[ 2], data, 8 );
- GET_ULONG_LE( X[ 3], data, 12 );
- GET_ULONG_LE( X[ 4], data, 16 );
- GET_ULONG_LE( X[ 5], data, 20 );
- GET_ULONG_LE( X[ 6], data, 24 );
- GET_ULONG_LE( X[ 7], data, 28 );
- GET_ULONG_LE( X[ 8], data, 32 );
- GET_ULONG_LE( X[ 9], data, 36 );
- GET_ULONG_LE( X[10], data, 40 );
- GET_ULONG_LE( X[11], data, 44 );
- GET_ULONG_LE( X[12], data, 48 );
- GET_ULONG_LE( X[13], data, 52 );
- GET_ULONG_LE( X[14], data, 56 );
- GET_ULONG_LE( X[15], data, 60 );
-
-#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n)))
-
-#define P(a,b,c,d,k,s,t) \
-{ \
- a += F(b,c,d) + X[k] + t; a = S(a,s) + b; \
-}
-
- A = ctx->state[0];
- B = ctx->state[1];
- C = ctx->state[2];
- D = ctx->state[3];
-
-#define F(x,y,z) (z ^ (x & (y ^ z)))
-
- P( A, B, C, D, 0, 7, 0xD76AA478 );
- P( D, A, B, C, 1, 12, 0xE8C7B756 );
- P( C, D, A, B, 2, 17, 0x242070DB );
- P( B, C, D, A, 3, 22, 0xC1BDCEEE );
- P( A, B, C, D, 4, 7, 0xF57C0FAF );
- P( D, A, B, C, 5, 12, 0x4787C62A );
- P( C, D, A, B, 6, 17, 0xA8304613 );
- P( B, C, D, A, 7, 22, 0xFD469501 );
- P( A, B, C, D, 8, 7, 0x698098D8 );
- P( D, A, B, C, 9, 12, 0x8B44F7AF );
- P( C, D, A, B, 10, 17, 0xFFFF5BB1 );
- P( B, C, D, A, 11, 22, 0x895CD7BE );
- P( A, B, C, D, 12, 7, 0x6B901122 );
- P( D, A, B, C, 13, 12, 0xFD987193 );
- P( C, D, A, B, 14, 17, 0xA679438E );
- P( B, C, D, A, 15, 22, 0x49B40821 );
-
-#undef F
-
-#define F(x,y,z) (y ^ (z & (x ^ y)))
-
- P( A, B, C, D, 1, 5, 0xF61E2562 );
- P( D, A, B, C, 6, 9, 0xC040B340 );
- P( C, D, A, B, 11, 14, 0x265E5A51 );
- P( B, C, D, A, 0, 20, 0xE9B6C7AA );
- P( A, B, C, D, 5, 5, 0xD62F105D );
- P( D, A, B, C, 10, 9, 0x02441453 );
- P( C, D, A, B, 15, 14, 0xD8A1E681 );
- P( B, C, D, A, 4, 20, 0xE7D3FBC8 );
- P( A, B, C, D, 9, 5, 0x21E1CDE6 );
- P( D, A, B, C, 14, 9, 0xC33707D6 );
- P( C, D, A, B, 3, 14, 0xF4D50D87 );
- P( B, C, D, A, 8, 20, 0x455A14ED );
- P( A, B, C, D, 13, 5, 0xA9E3E905 );
- P( D, A, B, C, 2, 9, 0xFCEFA3F8 );
- P( C, D, A, B, 7, 14, 0x676F02D9 );
- P( B, C, D, A, 12, 20, 0x8D2A4C8A );
-
-#undef F
-
-#define F(x,y,z) (x ^ y ^ z)
-
- P( A, B, C, D, 5, 4, 0xFFFA3942 );
- P( D, A, B, C, 8, 11, 0x8771F681 );
- P( C, D, A, B, 11, 16, 0x6D9D6122 );
- P( B, C, D, A, 14, 23, 0xFDE5380C );
- P( A, B, C, D, 1, 4, 0xA4BEEA44 );
- P( D, A, B, C, 4, 11, 0x4BDECFA9 );
- P( C, D, A, B, 7, 16, 0xF6BB4B60 );
- P( B, C, D, A, 10, 23, 0xBEBFBC70 );
- P( A, B, C, D, 13, 4, 0x289B7EC6 );
- P( D, A, B, C, 0, 11, 0xEAA127FA );
- P( C, D, A, B, 3, 16, 0xD4EF3085 );
- P( B, C, D, A, 6, 23, 0x04881D05 );
- P( A, B, C, D, 9, 4, 0xD9D4D039 );
- P( D, A, B, C, 12, 11, 0xE6DB99E5 );
- P( C, D, A, B, 15, 16, 0x1FA27CF8 );
- P( B, C, D, A, 2, 23, 0xC4AC5665 );
-
-#undef F
-
-#define F(x,y,z) (y ^ (x | ~z))
-
- P( A, B, C, D, 0, 6, 0xF4292244 );
- P( D, A, B, C, 7, 10, 0x432AFF97 );
- P( C, D, A, B, 14, 15, 0xAB9423A7 );
- P( B, C, D, A, 5, 21, 0xFC93A039 );
- P( A, B, C, D, 12, 6, 0x655B59C3 );
- P( D, A, B, C, 3, 10, 0x8F0CCC92 );
- P( C, D, A, B, 10, 15, 0xFFEFF47D );
- P( B, C, D, A, 1, 21, 0x85845DD1 );
- P( A, B, C, D, 8, 6, 0x6FA87E4F );
- P( D, A, B, C, 15, 10, 0xFE2CE6E0 );
- P( C, D, A, B, 6, 15, 0xA3014314 );
- P( B, C, D, A, 13, 21, 0x4E0811A1 );
- P( A, B, C, D, 4, 6, 0xF7537E82 );
- P( D, A, B, C, 11, 10, 0xBD3AF235 );
- P( C, D, A, B, 2, 15, 0x2AD7D2BB );
- P( B, C, D, A, 9, 21, 0xEB86D391 );
-
-#undef F
-
- ctx->state[0] += A;
- ctx->state[1] += B;
- ctx->state[2] += C;
- ctx->state[3] += D;
-}
-
-/*
- * MD5 process buffer
- */
-void md5_update( md5_context *ctx, unsigned char *input, int ilen )
-{
- int fill;
- unsigned long left;
-
- if( ilen <= 0 )
- return;
-
- left = ctx->total[0] & 0x3F;
- fill = 64 - left;
-
- ctx->total[0] += ilen;
- ctx->total[0] &= 0xFFFFFFFF;
-
- if( ctx->total[0] < (unsigned long) ilen )
- ctx->total[1]++;
-
- if( left && ilen >= fill )
- {
- memcpy( (void *) (ctx->buffer + left),
- (void *) input, fill );
- md5_process( ctx, ctx->buffer );
- input += fill;
- ilen -= fill;
- left = 0;
- }
-
- while( ilen >= 64 )
- {
- md5_process( ctx, input );
- input += 64;
- ilen -= 64;
- }
-
- if( ilen > 0 )
- {
- memcpy( (void *) (ctx->buffer + left),
- (void *) input, ilen );
- }
-}
-
-static const unsigned char md5_padding[64] =
-{
- 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-/*
- * MD5 final digest
- */
-void md5_finish( md5_context *ctx, unsigned char output[16] )
-{
- unsigned long last, padn;
- unsigned long high, low;
- unsigned char msglen[8];
-
- high = ( ctx->total[0] >> 29 )
- | ( ctx->total[1] << 3 );
- low = ( ctx->total[0] << 3 );
-
- PUT_ULONG_LE( low, msglen, 0 );
- PUT_ULONG_LE( high, msglen, 4 );
-
- last = ctx->total[0] & 0x3F;
- padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last );
-
- md5_update( ctx, (unsigned char *) md5_padding, padn );
- md5_update( ctx, msglen, 8 );
-
- PUT_ULONG_LE( ctx->state[0], output, 0 );
- PUT_ULONG_LE( ctx->state[1], output, 4 );
- PUT_ULONG_LE( ctx->state[2], output, 8 );
- PUT_ULONG_LE( ctx->state[3], output, 12 );
-}
-
-/*
- * output = MD5( input buffer )
- */
-void md5( unsigned char *input, int ilen, unsigned char output[16] )
-{
- md5_context ctx;
-
- md5_starts( &ctx );
- md5_update( &ctx, input, ilen );
- md5_finish( &ctx, output );
-
- memset( &ctx, 0, sizeof( md5_context ) );
-}
-
-unsigned int md5hash ( const void * input, int len, unsigned int /*seed*/ )
-{
- unsigned int hash[4];
-
- md5((unsigned char *)input,len,(unsigned char *)hash);
-
- //return hash[0] ^ hash[1] ^ hash[2] ^ hash[3];
-
- return hash[0];
-}
-
-void md5_32 ( const void * key, int len, uint32_t /*seed*/, void * out )
-{
- unsigned int hash[4];
-
- md5((unsigned char*)key,len,(unsigned char*)hash);
-
- *(uint32_t*)out = hash[0];
+#include <memory.h> +#include "Types.h" + +// "Derived from the RSA Data Security, Inc. MD5 Message Digest Algorithm" + +/** + * \brief MD5 context structure + */ +typedef struct +{ + unsigned long total[2]; /*!< number of bytes processed */ + unsigned long state[4]; /*!< intermediate digest state */ + unsigned char buffer[64]; /*!< data block being processed */ + + unsigned char ipad[64]; /*!< HMAC: inner padding */ + unsigned char opad[64]; /*!< HMAC: outer padding */ +} +md5_context; + +/** + * \brief MD5 context setup + * + * \param ctx context to be initialized + */ +void md5_starts( md5_context *ctx ); + +/** + * \brief MD5 process buffer + * + * \param ctx MD5 context + * \param input buffer holding the data + * \param ilen length of the input data + */ +void md5_update( md5_context *ctx, unsigned char *input, int ilen ); + +/** + * \brief MD5 final digest + * + * \param ctx MD5 context + * \param output MD5 checksum result + */ +void md5_finish( md5_context *ctx, unsigned char output[16] ); + +/** + * \brief Output = MD5( input buffer ) + * + * \param input buffer holding the data + * \param ilen length of the input data + * \param output MD5 checksum result + */ +void md5( unsigned char *input, int ilen, unsigned char output[16] ); + +/** + * \brief Output = MD5( file contents ) + * + * \param path input file name + * \param output MD5 checksum result + * + * \return 0 if successful, 1 if fopen failed, + * or 2 if fread failed + */ +int md5_file( char *path, unsigned char output[16] ); + +/** + * \brief MD5 HMAC context setup + * + * \param ctx HMAC context to be initialized + * \param key HMAC secret key + * \param keylen length of the HMAC key + */ +void md5_hmac_starts( md5_context *ctx, unsigned char *key, int keylen ); + +/** + * \brief MD5 HMAC process buffer + * + * \param ctx HMAC context + * \param input buffer holding the data + * \param ilen length of the input data + */ +void md5_hmac_update( md5_context *ctx, unsigned char *input, int ilen ); + +/** + * \brief MD5 HMAC final digest + * + * \param ctx HMAC context + * \param output MD5 HMAC checksum result + */ +void md5_hmac_finish( md5_context *ctx, unsigned char output[16] ); + +/** + * \brief Output = HMAC-MD5( hmac key, input buffer ) + * + * \param key HMAC secret key + * \param keylen length of the HMAC key + * \param input buffer holding the data + * \param ilen length of the input data + * \param output HMAC-MD5 result + */ +void md5_hmac( unsigned char *key, int keylen, + unsigned char *input, int ilen, + unsigned char output[16] ); + +/** + * \brief Checkup routine + * + * \return 0 if successful, or 1 if the test failed + */ +int md5_self_test( int verbose ); + +/* + * 32-bit integer manipulation macros (little endian) + */ +#ifndef GET_ULONG_LE +#define GET_ULONG_LE(n,b,i) \ +{ \ + (n) = ( (unsigned long) (b)[(i) ] ) \ + | ( (unsigned long) (b)[(i) + 1] << 8 ) \ + | ( (unsigned long) (b)[(i) + 2] << 16 ) \ + | ( (unsigned long) (b)[(i) + 3] << 24 ); \ +} +#endif + +#ifndef PUT_ULONG_LE +#define PUT_ULONG_LE(n,b,i) \ +{ \ + (b)[(i) ] = (unsigned char) ( (n) ); \ + (b)[(i) + 1] = (unsigned char) ( (n) >> 8 ); \ + (b)[(i) + 2] = (unsigned char) ( (n) >> 16 ); \ + (b)[(i) + 3] = (unsigned char) ( (n) >> 24 ); \ +} +#endif + +/* + * MD5 context setup + */ +void md5_starts( md5_context *ctx ) +{ + ctx->total[0] = 0; + ctx->total[1] = 0; + + ctx->state[0] = 0x67452301; + ctx->state[1] = 0xEFCDAB89; + ctx->state[2] = 0x98BADCFE; + ctx->state[3] = 0x10325476; +} + +static void md5_process( md5_context *ctx, unsigned char data[64] ) +{ + unsigned long X[16], A, B, C, D; + + GET_ULONG_LE( X[ 0], data, 0 ); + GET_ULONG_LE( X[ 1], data, 4 ); + GET_ULONG_LE( X[ 2], data, 8 ); + GET_ULONG_LE( X[ 3], data, 12 ); + GET_ULONG_LE( X[ 4], data, 16 ); + GET_ULONG_LE( X[ 5], data, 20 ); + GET_ULONG_LE( X[ 6], data, 24 ); + GET_ULONG_LE( X[ 7], data, 28 ); + GET_ULONG_LE( X[ 8], data, 32 ); + GET_ULONG_LE( X[ 9], data, 36 ); + GET_ULONG_LE( X[10], data, 40 ); + GET_ULONG_LE( X[11], data, 44 ); + GET_ULONG_LE( X[12], data, 48 ); + GET_ULONG_LE( X[13], data, 52 ); + GET_ULONG_LE( X[14], data, 56 ); + GET_ULONG_LE( X[15], data, 60 ); + +#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n))) + +#define P(a,b,c,d,k,s,t) \ +{ \ + a += F(b,c,d) + X[k] + t; a = S(a,s) + b; \ +} + + A = ctx->state[0]; + B = ctx->state[1]; + C = ctx->state[2]; + D = ctx->state[3]; + +#define F(x,y,z) (z ^ (x & (y ^ z))) + + P( A, B, C, D, 0, 7, 0xD76AA478 ); + P( D, A, B, C, 1, 12, 0xE8C7B756 ); + P( C, D, A, B, 2, 17, 0x242070DB ); + P( B, C, D, A, 3, 22, 0xC1BDCEEE ); + P( A, B, C, D, 4, 7, 0xF57C0FAF ); + P( D, A, B, C, 5, 12, 0x4787C62A ); + P( C, D, A, B, 6, 17, 0xA8304613 ); + P( B, C, D, A, 7, 22, 0xFD469501 ); + P( A, B, C, D, 8, 7, 0x698098D8 ); + P( D, A, B, C, 9, 12, 0x8B44F7AF ); + P( C, D, A, B, 10, 17, 0xFFFF5BB1 ); + P( B, C, D, A, 11, 22, 0x895CD7BE ); + P( A, B, C, D, 12, 7, 0x6B901122 ); + P( D, A, B, C, 13, 12, 0xFD987193 ); + P( C, D, A, B, 14, 17, 0xA679438E ); + P( B, C, D, A, 15, 22, 0x49B40821 ); + +#undef F + +#define F(x,y,z) (y ^ (z & (x ^ y))) + + P( A, B, C, D, 1, 5, 0xF61E2562 ); + P( D, A, B, C, 6, 9, 0xC040B340 ); + P( C, D, A, B, 11, 14, 0x265E5A51 ); + P( B, C, D, A, 0, 20, 0xE9B6C7AA ); + P( A, B, C, D, 5, 5, 0xD62F105D ); + P( D, A, B, C, 10, 9, 0x02441453 ); + P( C, D, A, B, 15, 14, 0xD8A1E681 ); + P( B, C, D, A, 4, 20, 0xE7D3FBC8 ); + P( A, B, C, D, 9, 5, 0x21E1CDE6 ); + P( D, A, B, C, 14, 9, 0xC33707D6 ); + P( C, D, A, B, 3, 14, 0xF4D50D87 ); + P( B, C, D, A, 8, 20, 0x455A14ED ); + P( A, B, C, D, 13, 5, 0xA9E3E905 ); + P( D, A, B, C, 2, 9, 0xFCEFA3F8 ); + P( C, D, A, B, 7, 14, 0x676F02D9 ); + P( B, C, D, A, 12, 20, 0x8D2A4C8A ); + +#undef F + +#define F(x,y,z) (x ^ y ^ z) + + P( A, B, C, D, 5, 4, 0xFFFA3942 ); + P( D, A, B, C, 8, 11, 0x8771F681 ); + P( C, D, A, B, 11, 16, 0x6D9D6122 ); + P( B, C, D, A, 14, 23, 0xFDE5380C ); + P( A, B, C, D, 1, 4, 0xA4BEEA44 ); + P( D, A, B, C, 4, 11, 0x4BDECFA9 ); + P( C, D, A, B, 7, 16, 0xF6BB4B60 ); + P( B, C, D, A, 10, 23, 0xBEBFBC70 ); + P( A, B, C, D, 13, 4, 0x289B7EC6 ); + P( D, A, B, C, 0, 11, 0xEAA127FA ); + P( C, D, A, B, 3, 16, 0xD4EF3085 ); + P( B, C, D, A, 6, 23, 0x04881D05 ); + P( A, B, C, D, 9, 4, 0xD9D4D039 ); + P( D, A, B, C, 12, 11, 0xE6DB99E5 ); + P( C, D, A, B, 15, 16, 0x1FA27CF8 ); + P( B, C, D, A, 2, 23, 0xC4AC5665 ); + +#undef F + +#define F(x,y,z) (y ^ (x | ~z)) + + P( A, B, C, D, 0, 6, 0xF4292244 ); + P( D, A, B, C, 7, 10, 0x432AFF97 ); + P( C, D, A, B, 14, 15, 0xAB9423A7 ); + P( B, C, D, A, 5, 21, 0xFC93A039 ); + P( A, B, C, D, 12, 6, 0x655B59C3 ); + P( D, A, B, C, 3, 10, 0x8F0CCC92 ); + P( C, D, A, B, 10, 15, 0xFFEFF47D ); + P( B, C, D, A, 1, 21, 0x85845DD1 ); + P( A, B, C, D, 8, 6, 0x6FA87E4F ); + P( D, A, B, C, 15, 10, 0xFE2CE6E0 ); + P( C, D, A, B, 6, 15, 0xA3014314 ); + P( B, C, D, A, 13, 21, 0x4E0811A1 ); + P( A, B, C, D, 4, 6, 0xF7537E82 ); + P( D, A, B, C, 11, 10, 0xBD3AF235 ); + P( C, D, A, B, 2, 15, 0x2AD7D2BB ); + P( B, C, D, A, 9, 21, 0xEB86D391 ); + +#undef F + + ctx->state[0] += A; + ctx->state[1] += B; + ctx->state[2] += C; + ctx->state[3] += D; +} + +/* + * MD5 process buffer + */ +void md5_update( md5_context *ctx, unsigned char *input, int ilen ) +{ + int fill; + unsigned long left; + + if( ilen <= 0 ) + return; + + left = ctx->total[0] & 0x3F; + fill = 64 - left; + + ctx->total[0] += ilen; + ctx->total[0] &= 0xFFFFFFFF; + + if( ctx->total[0] < (unsigned long) ilen ) + ctx->total[1]++; + + if( left && ilen >= fill ) + { + memcpy( (void *) (ctx->buffer + left), + (void *) input, fill ); + md5_process( ctx, ctx->buffer ); + input += fill; + ilen -= fill; + left = 0; + } + + while( ilen >= 64 ) + { + md5_process( ctx, input ); + input += 64; + ilen -= 64; + } + + if( ilen > 0 ) + { + memcpy( (void *) (ctx->buffer + left), + (void *) input, ilen ); + } +} + +static const unsigned char md5_padding[64] = +{ + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* + * MD5 final digest + */ +void md5_finish( md5_context *ctx, unsigned char output[16] ) +{ + unsigned long last, padn; + unsigned long high, low; + unsigned char msglen[8]; + + high = ( ctx->total[0] >> 29 ) + | ( ctx->total[1] << 3 ); + low = ( ctx->total[0] << 3 ); + + PUT_ULONG_LE( low, msglen, 0 ); + PUT_ULONG_LE( high, msglen, 4 ); + + last = ctx->total[0] & 0x3F; + padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last ); + + md5_update( ctx, (unsigned char *) md5_padding, padn ); + md5_update( ctx, msglen, 8 ); + + PUT_ULONG_LE( ctx->state[0], output, 0 ); + PUT_ULONG_LE( ctx->state[1], output, 4 ); + PUT_ULONG_LE( ctx->state[2], output, 8 ); + PUT_ULONG_LE( ctx->state[3], output, 12 ); +} + +/* + * output = MD5( input buffer ) + */ +void md5( unsigned char *input, int ilen, unsigned char output[16] ) +{ + md5_context ctx; + + md5_starts( &ctx ); + md5_update( &ctx, input, ilen ); + md5_finish( &ctx, output ); + + memset( &ctx, 0, sizeof( md5_context ) ); +} + +unsigned int md5hash ( const void * input, int len, unsigned int /*seed*/ ) +{ + unsigned int hash[4]; + + md5((unsigned char *)input,len,(unsigned char *)hash); + + //return hash[0] ^ hash[1] ^ hash[2] ^ hash[3]; + + return hash[0]; +} + +void md5_32 ( const void * key, int len, uint32_t /*seed*/, void * out ) +{ + unsigned int hash[4]; + + md5((unsigned char*)key,len,(unsigned char*)hash); + + *(uint32_t*)out = hash[0]; }
\ No newline at end of file @@ -1,799 +1,799 @@ -/* A portable stdint.h
- ****************************************************************************
- * BSD License:
- ****************************************************************************
- *
- * Copyright (c) 2005-2007 Paul Hsieh
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- ****************************************************************************
- *
- * Version 0.1.11
- *
- * The ANSI C standard committee, for the C99 standard, specified the
- * inclusion of a new standard include file called stdint.h. This is
- * a very useful and long desired include file which contains several
- * very precise definitions for integer scalar types that is
- * critically important for making portable several classes of
- * applications including cryptography, hashing, variable length
- * integer libraries and so on. But for most developers its likely
- * useful just for programming sanity.
- *
- * The problem is that most compiler vendors have decided not to
- * implement the C99 standard, and the next C++ language standard
- * (which has a lot more mindshare these days) will be a long time in
- * coming and its unknown whether or not it will include stdint.h or
- * how much adoption it will have. Either way, it will be a long time
- * before all compilers come with a stdint.h and it also does nothing
- * for the extremely large number of compilers available today which
- * do not include this file, or anything comparable to it.
- *
- * So that's what this file is all about. Its an attempt to build a
- * single universal include file that works on as many platforms as
- * possible to deliver what stdint.h is supposed to. A few things
- * that should be noted about this file:
- *
- * 1) It is not guaranteed to be portable and/or present an identical
- * interface on all platforms. The extreme variability of the
- * ANSI C standard makes this an impossibility right from the
- * very get go. Its really only meant to be useful for the vast
- * majority of platforms that possess the capability of
- * implementing usefully and precisely defined, standard sized
- * integer scalars. Systems which are not intrinsically 2s
- * complement may produce invalid constants.
- *
- * 2) There is an unavoidable use of non-reserved symbols.
- *
- * 3) Other standard include files are invoked.
- *
- * 4) This file may come in conflict with future platforms that do
- * include stdint.h. The hope is that one or the other can be
- * used with no real difference.
- *
- * 5) In the current verison, if your platform can't represent
- * int32_t, int16_t and int8_t, it just dumps out with a compiler
- * error.
- *
- * 6) 64 bit integers may or may not be defined. Test for their
- * presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX.
- * Note that this is different from the C99 specification which
- * requires the existence of 64 bit support in the compiler. If
- * this is not defined for your platform, yet it is capable of
- * dealing with 64 bits then it is because this file has not yet
- * been extended to cover all of your system's capabilities.
- *
- * 7) (u)intptr_t may or may not be defined. Test for its presence
- * with the test: #ifdef PTRDIFF_MAX. If this is not defined
- * for your platform, then it is because this file has not yet
- * been extended to cover all of your system's capabilities, not
- * because its optional.
- *
- * 8) The following might not been defined even if your platform is
- * capable of defining it:
- *
- * WCHAR_MIN
- * WCHAR_MAX
- * (u)int64_t
- * PTRDIFF_MIN
- * PTRDIFF_MAX
- * (u)intptr_t
- *
- * 9) The following have not been defined:
- *
- * WINT_MIN
- * WINT_MAX
- *
- * 10) The criteria for defining (u)int_least(*)_t isn't clear,
- * except for systems which don't have a type that precisely
- * defined 8, 16, or 32 bit types (which this include file does
- * not support anyways). Default definitions have been given.
- *
- * 11) The criteria for defining (u)int_fast(*)_t isn't something I
- * would trust to any particular compiler vendor or the ANSI C
- * committee. It is well known that "compatible systems" are
- * commonly created that have very different performance
- * characteristics from the systems they are compatible with,
- * especially those whose vendors make both the compiler and the
- * system. Default definitions have been given, but its strongly
- * recommended that users never use these definitions for any
- * reason (they do *NOT* deliver any serious guarantee of
- * improved performance -- not in this file, nor any vendor's
- * stdint.h).
- *
- * 12) The following macros:
- *
- * PRINTF_INTMAX_MODIFIER
- * PRINTF_INT64_MODIFIER
- * PRINTF_INT32_MODIFIER
- * PRINTF_INT16_MODIFIER
- * PRINTF_LEAST64_MODIFIER
- * PRINTF_LEAST32_MODIFIER
- * PRINTF_LEAST16_MODIFIER
- * PRINTF_INTPTR_MODIFIER
- *
- * are strings which have been defined as the modifiers required
- * for the "d", "u" and "x" printf formats to correctly output
- * (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t,
- * (u)least32_t, (u)least16_t and (u)intptr_t types respectively.
- * PRINTF_INTPTR_MODIFIER is not defined for some systems which
- * provide their own stdint.h. PRINTF_INT64_MODIFIER is not
- * defined if INT64_MAX is not defined. These are an extension
- * beyond what C99 specifies must be in stdint.h.
- *
- * In addition, the following macros are defined:
- *
- * PRINTF_INTMAX_HEX_WIDTH
- * PRINTF_INT64_HEX_WIDTH
- * PRINTF_INT32_HEX_WIDTH
- * PRINTF_INT16_HEX_WIDTH
- * PRINTF_INT8_HEX_WIDTH
- * PRINTF_INTMAX_DEC_WIDTH
- * PRINTF_INT64_DEC_WIDTH
- * PRINTF_INT32_DEC_WIDTH
- * PRINTF_INT16_DEC_WIDTH
- * PRINTF_INT8_DEC_WIDTH
- *
- * Which specifies the maximum number of characters required to
- * print the number of that type in either hexadecimal or decimal.
- * These are an extension beyond what C99 specifies must be in
- * stdint.h.
- *
- * Compilers tested (all with 0 warnings at their highest respective
- * settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32
- * bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio
- * .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3
- *
- * This file should be considered a work in progress. Suggestions for
- * improvements, especially those which increase coverage are strongly
- * encouraged.
- *
- * Acknowledgements
- *
- * The following people have made significant contributions to the
- * development and testing of this file:
- *
- * Chris Howie
- * John Steele Scott
- * Dave Thorup
- *
- */
-
-#include <stddef.h>
-#include <limits.h>
-#include <signal.h>
-
-/*
- * For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and
- * do nothing else. On the Mac OS X version of gcc this is _STDINT_H_.
- */
-
-#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) )) && !defined (_PSTDINT_H_INCLUDED)
-#include <stdint.h>
-#define _PSTDINT_H_INCLUDED
-# ifndef PRINTF_INT64_MODIFIER
-# define PRINTF_INT64_MODIFIER "ll"
-# endif
-# ifndef PRINTF_INT32_MODIFIER
-# define PRINTF_INT32_MODIFIER "l"
-# endif
-# ifndef PRINTF_INT16_MODIFIER
-# define PRINTF_INT16_MODIFIER "h"
-# endif
-# ifndef PRINTF_INTMAX_MODIFIER
-# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
-# endif
-# ifndef PRINTF_INT64_HEX_WIDTH
-# define PRINTF_INT64_HEX_WIDTH "16"
-# endif
-# ifndef PRINTF_INT32_HEX_WIDTH
-# define PRINTF_INT32_HEX_WIDTH "8"
-# endif
-# ifndef PRINTF_INT16_HEX_WIDTH
-# define PRINTF_INT16_HEX_WIDTH "4"
-# endif
-# ifndef PRINTF_INT8_HEX_WIDTH
-# define PRINTF_INT8_HEX_WIDTH "2"
-# endif
-# ifndef PRINTF_INT64_DEC_WIDTH
-# define PRINTF_INT64_DEC_WIDTH "20"
-# endif
-# ifndef PRINTF_INT32_DEC_WIDTH
-# define PRINTF_INT32_DEC_WIDTH "10"
-# endif
-# ifndef PRINTF_INT16_DEC_WIDTH
-# define PRINTF_INT16_DEC_WIDTH "5"
-# endif
-# ifndef PRINTF_INT8_DEC_WIDTH
-# define PRINTF_INT8_DEC_WIDTH "3"
-# endif
-# ifndef PRINTF_INTMAX_HEX_WIDTH
-# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
-# endif
-# ifndef PRINTF_INTMAX_DEC_WIDTH
-# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
-# endif
-
-/*
- * Something really weird is going on with Open Watcom. Just pull some of
- * these duplicated definitions from Open Watcom's stdint.h file for now.
- */
-
-# if defined (__WATCOMC__) && __WATCOMC__ >= 1250
-# if !defined (INT64_C)
-# define INT64_C(x) (x + (INT64_MAX - INT64_MAX))
-# endif
-# if !defined (UINT64_C)
-# define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX))
-# endif
-# if !defined (INT32_C)
-# define INT32_C(x) (x + (INT32_MAX - INT32_MAX))
-# endif
-# if !defined (UINT32_C)
-# define UINT32_C(x) (x + (UINT32_MAX - UINT32_MAX))
-# endif
-# if !defined (INT16_C)
-# define INT16_C(x) (x)
-# endif
-# if !defined (UINT16_C)
-# define UINT16_C(x) (x)
-# endif
-# if !defined (INT8_C)
-# define INT8_C(x) (x)
-# endif
-# if !defined (UINT8_C)
-# define UINT8_C(x) (x)
-# endif
-# if !defined (UINT64_MAX)
-# define UINT64_MAX 18446744073709551615ULL
-# endif
-# if !defined (INT64_MAX)
-# define INT64_MAX 9223372036854775807LL
-# endif
-# if !defined (UINT32_MAX)
-# define UINT32_MAX 4294967295UL
-# endif
-# if !defined (INT32_MAX)
-# define INT32_MAX 2147483647L
-# endif
-# if !defined (INTMAX_MAX)
-# define INTMAX_MAX INT64_MAX
-# endif
-# if !defined (INTMAX_MIN)
-# define INTMAX_MIN INT64_MIN
-# endif
-# endif
-#endif
-
-#ifndef _PSTDINT_H_INCLUDED
-#define _PSTDINT_H_INCLUDED
-
-#ifndef SIZE_MAX
-# define SIZE_MAX (~(size_t)0)
-#endif
-
-/*
- * Deduce the type assignments from limits.h under the assumption that
- * integer sizes in bits are powers of 2, and follow the ANSI
- * definitions.
- */
-
-#ifndef UINT8_MAX
-# define UINT8_MAX 0xff
-#endif
-#ifndef uint8_t
-# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S)
- typedef unsigned char uint8_t;
-# define UINT8_C(v) ((uint8_t) v)
-# else
-# error "Platform not supported"
-# endif
-#endif
-
-#ifndef INT8_MAX
-# define INT8_MAX 0x7f
-#endif
-#ifndef INT8_MIN
-# define INT8_MIN INT8_C(0x80)
-#endif
-#ifndef int8_t
-# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S)
- typedef signed char int8_t;
-# define INT8_C(v) ((int8_t) v)
-# else
-# error "Platform not supported"
-# endif
-#endif
-
-#ifndef UINT16_MAX
-# define UINT16_MAX 0xffff
-#endif
-#ifndef uint16_t
-#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S)
- typedef unsigned int uint16_t;
-# ifndef PRINTF_INT16_MODIFIER
-# define PRINTF_INT16_MODIFIER ""
-# endif
-# define UINT16_C(v) ((uint16_t) (v))
-#elif (USHRT_MAX == UINT16_MAX)
- typedef unsigned short uint16_t;
-# define UINT16_C(v) ((uint16_t) (v))
-# ifndef PRINTF_INT16_MODIFIER
-# define PRINTF_INT16_MODIFIER "h"
-# endif
-#else
-#error "Platform not supported"
-#endif
-#endif
-
-#ifndef INT16_MAX
-# define INT16_MAX 0x7fff
-#endif
-#ifndef INT16_MIN
-# define INT16_MIN INT16_C(0x8000)
-#endif
-#ifndef int16_t
-#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S)
- typedef signed int int16_t;
-# define INT16_C(v) ((int16_t) (v))
-# ifndef PRINTF_INT16_MODIFIER
-# define PRINTF_INT16_MODIFIER ""
-# endif
-#elif (SHRT_MAX == INT16_MAX)
- typedef signed short int16_t;
-# define INT16_C(v) ((int16_t) (v))
-# ifndef PRINTF_INT16_MODIFIER
-# define PRINTF_INT16_MODIFIER "h"
-# endif
-#else
-#error "Platform not supported"
-#endif
-#endif
-
-#ifndef UINT32_MAX
-# define UINT32_MAX (0xffffffffUL)
-#endif
-#ifndef uint32_t
-#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S)
- typedef unsigned long uint32_t;
-# define UINT32_C(v) v ## UL
-# ifndef PRINTF_INT32_MODIFIER
-# define PRINTF_INT32_MODIFIER "l"
-# endif
-#elif (UINT_MAX == UINT32_MAX)
- typedef unsigned int uint32_t;
-# ifndef PRINTF_INT32_MODIFIER
-# define PRINTF_INT32_MODIFIER ""
-# endif
-# define UINT32_C(v) v ## U
-#elif (USHRT_MAX == UINT32_MAX)
- typedef unsigned short uint32_t;
-# define UINT32_C(v) ((unsigned short) (v))
-# ifndef PRINTF_INT32_MODIFIER
-# define PRINTF_INT32_MODIFIER ""
-# endif
-#else
-#error "Platform not supported"
-#endif
-#endif
-
-#ifndef INT32_MAX
-# define INT32_MAX (0x7fffffffL)
-#endif
-#ifndef INT32_MIN
-# define INT32_MIN INT32_C(0x80000000)
-#endif
-#ifndef int32_t
-#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S)
- typedef signed long int32_t;
-# define INT32_C(v) v ## L
-# ifndef PRINTF_INT32_MODIFIER
-# define PRINTF_INT32_MODIFIER "l"
-# endif
-#elif (INT_MAX == INT32_MAX)
- typedef signed int int32_t;
-# define INT32_C(v) v
-# ifndef PRINTF_INT32_MODIFIER
-# define PRINTF_INT32_MODIFIER ""
-# endif
-#elif (SHRT_MAX == INT32_MAX)
- typedef signed short int32_t;
-# define INT32_C(v) ((short) (v))
-# ifndef PRINTF_INT32_MODIFIER
-# define PRINTF_INT32_MODIFIER ""
-# endif
-#else
-#error "Platform not supported"
-#endif
-#endif
-
-/*
- * The macro stdint_int64_defined is temporarily used to record
- * whether or not 64 integer support is available. It must be
- * defined for any 64 integer extensions for new platforms that are
- * added.
- */
-
-#undef stdint_int64_defined
-#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S)
-# if (__STDC__ && __STDC_VERSION >= 199901L) || defined (S_SPLINT_S)
-# define stdint_int64_defined
- typedef long long int64_t;
- typedef unsigned long long uint64_t;
-# define UINT64_C(v) v ## ULL
-# define INT64_C(v) v ## LL
-# ifndef PRINTF_INT64_MODIFIER
-# define PRINTF_INT64_MODIFIER "ll"
-# endif
-# endif
-#endif
-
-#if !defined (stdint_int64_defined)
-# if defined(__GNUC__)
-# define stdint_int64_defined
- __extension__ typedef long long int64_t;
- __extension__ typedef unsigned long long uint64_t;
-# define UINT64_C(v) v ## ULL
-# define INT64_C(v) v ## LL
-# ifndef PRINTF_INT64_MODIFIER
-# define PRINTF_INT64_MODIFIER "ll"
-# endif
-# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S)
-# define stdint_int64_defined
- typedef long long int64_t;
- typedef unsigned long long uint64_t;
-# define UINT64_C(v) v ## ULL
-# define INT64_C(v) v ## LL
-# ifndef PRINTF_INT64_MODIFIER
-# define PRINTF_INT64_MODIFIER "ll"
-# endif
-# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC)
-# define stdint_int64_defined
- typedef __int64 int64_t;
- typedef unsigned __int64 uint64_t;
-# define UINT64_C(v) v ## UI64
-# define INT64_C(v) v ## I64
-# ifndef PRINTF_INT64_MODIFIER
-# define PRINTF_INT64_MODIFIER "I64"
-# endif
-# endif
-#endif
-
-#if !defined (LONG_LONG_MAX) && defined (INT64_C)
-# define LONG_LONG_MAX INT64_C (9223372036854775807)
-#endif
-#ifndef ULONG_LONG_MAX
-# define ULONG_LONG_MAX UINT64_C (18446744073709551615)
-#endif
-
-#if !defined (INT64_MAX) && defined (INT64_C)
-# define INT64_MAX INT64_C (9223372036854775807)
-#endif
-#if !defined (INT64_MIN) && defined (INT64_C)
-# define INT64_MIN INT64_C (-9223372036854775808)
-#endif
-#if !defined (UINT64_MAX) && defined (INT64_C)
-# define UINT64_MAX UINT64_C (18446744073709551615)
-#endif
-
-/*
- * Width of hexadecimal for number field.
- */
-
-#ifndef PRINTF_INT64_HEX_WIDTH
-# define PRINTF_INT64_HEX_WIDTH "16"
-#endif
-#ifndef PRINTF_INT32_HEX_WIDTH
-# define PRINTF_INT32_HEX_WIDTH "8"
-#endif
-#ifndef PRINTF_INT16_HEX_WIDTH
-# define PRINTF_INT16_HEX_WIDTH "4"
-#endif
-#ifndef PRINTF_INT8_HEX_WIDTH
-# define PRINTF_INT8_HEX_WIDTH "2"
-#endif
-
-#ifndef PRINTF_INT64_DEC_WIDTH
-# define PRINTF_INT64_DEC_WIDTH "20"
-#endif
-#ifndef PRINTF_INT32_DEC_WIDTH
-# define PRINTF_INT32_DEC_WIDTH "10"
-#endif
-#ifndef PRINTF_INT16_DEC_WIDTH
-# define PRINTF_INT16_DEC_WIDTH "5"
-#endif
-#ifndef PRINTF_INT8_DEC_WIDTH
-# define PRINTF_INT8_DEC_WIDTH "3"
-#endif
-
-/*
- * Ok, lets not worry about 128 bit integers for now. Moore's law says
- * we don't need to worry about that until about 2040 at which point
- * we'll have bigger things to worry about.
- */
-
-#ifdef stdint_int64_defined
- typedef int64_t intmax_t;
- typedef uint64_t uintmax_t;
-# define INTMAX_MAX INT64_MAX
-# define INTMAX_MIN INT64_MIN
-# define UINTMAX_MAX UINT64_MAX
-# define UINTMAX_C(v) UINT64_C(v)
-# define INTMAX_C(v) INT64_C(v)
-# ifndef PRINTF_INTMAX_MODIFIER
-# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
-# endif
-# ifndef PRINTF_INTMAX_HEX_WIDTH
-# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
-# endif
-# ifndef PRINTF_INTMAX_DEC_WIDTH
-# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
-# endif
-#else
- typedef int32_t intmax_t;
- typedef uint32_t uintmax_t;
-# define INTMAX_MAX INT32_MAX
-# define UINTMAX_MAX UINT32_MAX
-# define UINTMAX_C(v) UINT32_C(v)
-# define INTMAX_C(v) INT32_C(v)
-# ifndef PRINTF_INTMAX_MODIFIER
-# define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER
-# endif
-# ifndef PRINTF_INTMAX_HEX_WIDTH
-# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH
-# endif
-# ifndef PRINTF_INTMAX_DEC_WIDTH
-# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH
-# endif
-#endif
-
-/*
- * Because this file currently only supports platforms which have
- * precise powers of 2 as bit sizes for the default integers, the
- * least definitions are all trivial. Its possible that a future
- * version of this file could have different definitions.
- */
-
-#ifndef stdint_least_defined
- typedef int8_t int_least8_t;
- typedef uint8_t uint_least8_t;
- typedef int16_t int_least16_t;
- typedef uint16_t uint_least16_t;
- typedef int32_t int_least32_t;
- typedef uint32_t uint_least32_t;
-# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER
-# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER
-# define UINT_LEAST8_MAX UINT8_MAX
-# define INT_LEAST8_MAX INT8_MAX
-# define UINT_LEAST16_MAX UINT16_MAX
-# define INT_LEAST16_MAX INT16_MAX
-# define UINT_LEAST32_MAX UINT32_MAX
-# define INT_LEAST32_MAX INT32_MAX
-# define INT_LEAST8_MIN INT8_MIN
-# define INT_LEAST16_MIN INT16_MIN
-# define INT_LEAST32_MIN INT32_MIN
-# ifdef stdint_int64_defined
- typedef int64_t int_least64_t;
- typedef uint64_t uint_least64_t;
-# define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER
-# define UINT_LEAST64_MAX UINT64_MAX
-# define INT_LEAST64_MAX INT64_MAX
-# define INT_LEAST64_MIN INT64_MIN
-# endif
-#endif
-#undef stdint_least_defined
-
-/*
- * The ANSI C committee pretending to know or specify anything about
- * performance is the epitome of misguided arrogance. The mandate of
- * this file is to *ONLY* ever support that absolute minimum
- * definition of the fast integer types, for compatibility purposes.
- * No extensions, and no attempt to suggest what may or may not be a
- * faster integer type will ever be made in this file. Developers are
- * warned to stay away from these types when using this or any other
- * stdint.h.
- */
-
-typedef int_least8_t int_fast8_t;
-typedef uint_least8_t uint_fast8_t;
-typedef int_least16_t int_fast16_t;
-typedef uint_least16_t uint_fast16_t;
-typedef int_least32_t int_fast32_t;
-typedef uint_least32_t uint_fast32_t;
-#define UINT_FAST8_MAX UINT_LEAST8_MAX
-#define INT_FAST8_MAX INT_LEAST8_MAX
-#define UINT_FAST16_MAX UINT_LEAST16_MAX
-#define INT_FAST16_MAX INT_LEAST16_MAX
-#define UINT_FAST32_MAX UINT_LEAST32_MAX
-#define INT_FAST32_MAX INT_LEAST32_MAX
-#define INT_FAST8_MIN INT_LEAST8_MIN
-#define INT_FAST16_MIN INT_LEAST16_MIN
-#define INT_FAST32_MIN INT_LEAST32_MIN
-#ifdef stdint_int64_defined
- typedef int_least64_t int_fast64_t;
- typedef uint_least64_t uint_fast64_t;
-# define UINT_FAST64_MAX UINT_LEAST64_MAX
-# define INT_FAST64_MAX INT_LEAST64_MAX
-# define INT_FAST64_MIN INT_LEAST64_MIN
-#endif
-
-#undef stdint_int64_defined
-
-/*
- * Whatever piecemeal, per compiler thing we can do about the wchar_t
- * type limits.
- */
-
-#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__)
-# include <wchar.h>
-# ifndef WCHAR_MIN
-# define WCHAR_MIN 0
-# endif
-# ifndef WCHAR_MAX
-# define WCHAR_MAX ((wchar_t)-1)
-# endif
-#endif
-
-/*
- * Whatever piecemeal, per compiler/platform thing we can do about the
- * (u)intptr_t types and limits.
- */
-
-#if defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED)
-# define STDINT_H_UINTPTR_T_DEFINED
-#endif
-
-#ifndef STDINT_H_UINTPTR_T_DEFINED
-# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64)
-# define stdint_intptr_bits 64
-# elif defined (__WATCOMC__) || defined (__TURBOC__)
-# if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__)
-# define stdint_intptr_bits 16
-# else
-# define stdint_intptr_bits 32
-# endif
-# elif defined (__i386__) || defined (_WIN32) || defined (WIN32)
-# define stdint_intptr_bits 32
-# elif defined (__INTEL_COMPILER)
-/* TODO -- what will Intel do about x86-64? */
-# endif
-
-# ifdef stdint_intptr_bits
-# define stdint_intptr_glue3_i(a,b,c) a##b##c
-# define stdint_intptr_glue3(a,b,c) stdint_intptr_glue3_i(a,b,c)
-# ifndef PRINTF_INTPTR_MODIFIER
-# define PRINTF_INTPTR_MODIFIER stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER)
-# endif
-# ifndef PTRDIFF_MAX
-# define PTRDIFF_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
-# endif
-# ifndef PTRDIFF_MIN
-# define PTRDIFF_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
-# endif
-# ifndef UINTPTR_MAX
-# define UINTPTR_MAX stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX)
-# endif
-# ifndef INTPTR_MAX
-# define INTPTR_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
-# endif
-# ifndef INTPTR_MIN
-# define INTPTR_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
-# endif
-# ifndef INTPTR_C
-# define INTPTR_C(x) stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x)
-# endif
-# ifndef UINTPTR_C
-# define UINTPTR_C(x) stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x)
-# endif
- typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t;
- typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t) intptr_t;
-# else
-/* TODO -- This following is likely wrong for some platforms, and does
- nothing for the definition of uintptr_t. */
- typedef ptrdiff_t intptr_t;
-# endif
-# define STDINT_H_UINTPTR_T_DEFINED
-#endif
-
-/*
- * Assumes sig_atomic_t is signed and we have a 2s complement machine.
- */
-
-#ifndef SIG_ATOMIC_MAX
-# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1)
-#endif
-
-#endif
-
-#if defined (__TEST_PSTDINT_FOR_CORRECTNESS)
-
-/*
- * Please compile with the maximum warning settings to make sure macros are not
- * defined more than once.
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#define glue3_aux(x,y,z) x ## y ## z
-#define glue3(x,y,z) glue3_aux(x,y,z)
-
-#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0);
-#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0);
-
-#define DECL(us,bits) glue3(DECL,us,) (bits)
-
-#define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits)
-
-int main () {
- DECL(I,8)
- DECL(U,8)
- DECL(I,16)
- DECL(U,16)
- DECL(I,32)
- DECL(U,32)
-#ifdef INT64_MAX
- DECL(I,64)
- DECL(U,64)
-#endif
- intmax_t imax = INTMAX_C(0);
- uintmax_t umax = UINTMAX_C(0);
- char str0[256], str1[256];
-
- sprintf (str0, "%d %x\n", 0, ~0);
-
- sprintf (str1, "%d %x\n", i8, ~0);
- if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1);
- sprintf (str1, "%u %x\n", u8, ~0);
- if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1);
- sprintf (str1, "%d %x\n", i16, ~0);
- if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1);
- sprintf (str1, "%u %x\n", u16, ~0);
- if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1);
- sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n", i32, ~0);
- if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1);
- sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n", u32, ~0);
- if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1);
-#ifdef INT64_MAX
- sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n", i64, ~0);
- if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1);
-#endif
- sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n", imax, ~0);
- if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1);
- sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n", umax, ~0);
- if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1);
-
- TESTUMAX(8);
- TESTUMAX(16);
- TESTUMAX(32);
-#ifdef INT64_MAX
- TESTUMAX(64);
-#endif
-
- return EXIT_SUCCESS;
-}
-
-#endif
+/* A portable stdint.h + **************************************************************************** + * BSD License: + **************************************************************************** + * + * Copyright (c) 2005-2007 Paul Hsieh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************** + * + * Version 0.1.11 + * + * The ANSI C standard committee, for the C99 standard, specified the + * inclusion of a new standard include file called stdint.h. This is + * a very useful and long desired include file which contains several + * very precise definitions for integer scalar types that is + * critically important for making portable several classes of + * applications including cryptography, hashing, variable length + * integer libraries and so on. But for most developers its likely + * useful just for programming sanity. + * + * The problem is that most compiler vendors have decided not to + * implement the C99 standard, and the next C++ language standard + * (which has a lot more mindshare these days) will be a long time in + * coming and its unknown whether or not it will include stdint.h or + * how much adoption it will have. Either way, it will be a long time + * before all compilers come with a stdint.h and it also does nothing + * for the extremely large number of compilers available today which + * do not include this file, or anything comparable to it. + * + * So that's what this file is all about. Its an attempt to build a + * single universal include file that works on as many platforms as + * possible to deliver what stdint.h is supposed to. A few things + * that should be noted about this file: + * + * 1) It is not guaranteed to be portable and/or present an identical + * interface on all platforms. The extreme variability of the + * ANSI C standard makes this an impossibility right from the + * very get go. Its really only meant to be useful for the vast + * majority of platforms that possess the capability of + * implementing usefully and precisely defined, standard sized + * integer scalars. Systems which are not intrinsically 2s + * complement may produce invalid constants. + * + * 2) There is an unavoidable use of non-reserved symbols. + * + * 3) Other standard include files are invoked. + * + * 4) This file may come in conflict with future platforms that do + * include stdint.h. The hope is that one or the other can be + * used with no real difference. + * + * 5) In the current verison, if your platform can't represent + * int32_t, int16_t and int8_t, it just dumps out with a compiler + * error. + * + * 6) 64 bit integers may or may not be defined. Test for their + * presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX. + * Note that this is different from the C99 specification which + * requires the existence of 64 bit support in the compiler. If + * this is not defined for your platform, yet it is capable of + * dealing with 64 bits then it is because this file has not yet + * been extended to cover all of your system's capabilities. + * + * 7) (u)intptr_t may or may not be defined. Test for its presence + * with the test: #ifdef PTRDIFF_MAX. If this is not defined + * for your platform, then it is because this file has not yet + * been extended to cover all of your system's capabilities, not + * because its optional. + * + * 8) The following might not been defined even if your platform is + * capable of defining it: + * + * WCHAR_MIN + * WCHAR_MAX + * (u)int64_t + * PTRDIFF_MIN + * PTRDIFF_MAX + * (u)intptr_t + * + * 9) The following have not been defined: + * + * WINT_MIN + * WINT_MAX + * + * 10) The criteria for defining (u)int_least(*)_t isn't clear, + * except for systems which don't have a type that precisely + * defined 8, 16, or 32 bit types (which this include file does + * not support anyways). Default definitions have been given. + * + * 11) The criteria for defining (u)int_fast(*)_t isn't something I + * would trust to any particular compiler vendor or the ANSI C + * committee. It is well known that "compatible systems" are + * commonly created that have very different performance + * characteristics from the systems they are compatible with, + * especially those whose vendors make both the compiler and the + * system. Default definitions have been given, but its strongly + * recommended that users never use these definitions for any + * reason (they do *NOT* deliver any serious guarantee of + * improved performance -- not in this file, nor any vendor's + * stdint.h). + * + * 12) The following macros: + * + * PRINTF_INTMAX_MODIFIER + * PRINTF_INT64_MODIFIER + * PRINTF_INT32_MODIFIER + * PRINTF_INT16_MODIFIER + * PRINTF_LEAST64_MODIFIER + * PRINTF_LEAST32_MODIFIER + * PRINTF_LEAST16_MODIFIER + * PRINTF_INTPTR_MODIFIER + * + * are strings which have been defined as the modifiers required + * for the "d", "u" and "x" printf formats to correctly output + * (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t, + * (u)least32_t, (u)least16_t and (u)intptr_t types respectively. + * PRINTF_INTPTR_MODIFIER is not defined for some systems which + * provide their own stdint.h. PRINTF_INT64_MODIFIER is not + * defined if INT64_MAX is not defined. These are an extension + * beyond what C99 specifies must be in stdint.h. + * + * In addition, the following macros are defined: + * + * PRINTF_INTMAX_HEX_WIDTH + * PRINTF_INT64_HEX_WIDTH + * PRINTF_INT32_HEX_WIDTH + * PRINTF_INT16_HEX_WIDTH + * PRINTF_INT8_HEX_WIDTH + * PRINTF_INTMAX_DEC_WIDTH + * PRINTF_INT64_DEC_WIDTH + * PRINTF_INT32_DEC_WIDTH + * PRINTF_INT16_DEC_WIDTH + * PRINTF_INT8_DEC_WIDTH + * + * Which specifies the maximum number of characters required to + * print the number of that type in either hexadecimal or decimal. + * These are an extension beyond what C99 specifies must be in + * stdint.h. + * + * Compilers tested (all with 0 warnings at their highest respective + * settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32 + * bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio + * .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3 + * + * This file should be considered a work in progress. Suggestions for + * improvements, especially those which increase coverage are strongly + * encouraged. + * + * Acknowledgements + * + * The following people have made significant contributions to the + * development and testing of this file: + * + * Chris Howie + * John Steele Scott + * Dave Thorup + * + */ + +#include <stddef.h> +#include <limits.h> +#include <signal.h> + +/* + * For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and + * do nothing else. On the Mac OS X version of gcc this is _STDINT_H_. + */ + +#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) )) && !defined (_PSTDINT_H_INCLUDED) +#include <stdint.h> +#define _PSTDINT_H_INCLUDED +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "l" +# endif +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "h" +# endif +# ifndef PRINTF_INTMAX_MODIFIER +# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER +# endif +# ifndef PRINTF_INT64_HEX_WIDTH +# define PRINTF_INT64_HEX_WIDTH "16" +# endif +# ifndef PRINTF_INT32_HEX_WIDTH +# define PRINTF_INT32_HEX_WIDTH "8" +# endif +# ifndef PRINTF_INT16_HEX_WIDTH +# define PRINTF_INT16_HEX_WIDTH "4" +# endif +# ifndef PRINTF_INT8_HEX_WIDTH +# define PRINTF_INT8_HEX_WIDTH "2" +# endif +# ifndef PRINTF_INT64_DEC_WIDTH +# define PRINTF_INT64_DEC_WIDTH "20" +# endif +# ifndef PRINTF_INT32_DEC_WIDTH +# define PRINTF_INT32_DEC_WIDTH "10" +# endif +# ifndef PRINTF_INT16_DEC_WIDTH +# define PRINTF_INT16_DEC_WIDTH "5" +# endif +# ifndef PRINTF_INT8_DEC_WIDTH +# define PRINTF_INT8_DEC_WIDTH "3" +# endif +# ifndef PRINTF_INTMAX_HEX_WIDTH +# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH +# endif +# ifndef PRINTF_INTMAX_DEC_WIDTH +# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH +# endif + +/* + * Something really weird is going on with Open Watcom. Just pull some of + * these duplicated definitions from Open Watcom's stdint.h file for now. + */ + +# if defined (__WATCOMC__) && __WATCOMC__ >= 1250 +# if !defined (INT64_C) +# define INT64_C(x) (x + (INT64_MAX - INT64_MAX)) +# endif +# if !defined (UINT64_C) +# define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX)) +# endif +# if !defined (INT32_C) +# define INT32_C(x) (x + (INT32_MAX - INT32_MAX)) +# endif +# if !defined (UINT32_C) +# define UINT32_C(x) (x + (UINT32_MAX - UINT32_MAX)) +# endif +# if !defined (INT16_C) +# define INT16_C(x) (x) +# endif +# if !defined (UINT16_C) +# define UINT16_C(x) (x) +# endif +# if !defined (INT8_C) +# define INT8_C(x) (x) +# endif +# if !defined (UINT8_C) +# define UINT8_C(x) (x) +# endif +# if !defined (UINT64_MAX) +# define UINT64_MAX 18446744073709551615ULL +# endif +# if !defined (INT64_MAX) +# define INT64_MAX 9223372036854775807LL +# endif +# if !defined (UINT32_MAX) +# define UINT32_MAX 4294967295UL +# endif +# if !defined (INT32_MAX) +# define INT32_MAX 2147483647L +# endif +# if !defined (INTMAX_MAX) +# define INTMAX_MAX INT64_MAX +# endif +# if !defined (INTMAX_MIN) +# define INTMAX_MIN INT64_MIN +# endif +# endif +#endif + +#ifndef _PSTDINT_H_INCLUDED +#define _PSTDINT_H_INCLUDED + +#ifndef SIZE_MAX +# define SIZE_MAX (~(size_t)0) +#endif + +/* + * Deduce the type assignments from limits.h under the assumption that + * integer sizes in bits are powers of 2, and follow the ANSI + * definitions. + */ + +#ifndef UINT8_MAX +# define UINT8_MAX 0xff +#endif +#ifndef uint8_t +# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S) + typedef unsigned char uint8_t; +# define UINT8_C(v) ((uint8_t) v) +# else +# error "Platform not supported" +# endif +#endif + +#ifndef INT8_MAX +# define INT8_MAX 0x7f +#endif +#ifndef INT8_MIN +# define INT8_MIN INT8_C(0x80) +#endif +#ifndef int8_t +# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S) + typedef signed char int8_t; +# define INT8_C(v) ((int8_t) v) +# else +# error "Platform not supported" +# endif +#endif + +#ifndef UINT16_MAX +# define UINT16_MAX 0xffff +#endif +#ifndef uint16_t +#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S) + typedef unsigned int uint16_t; +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "" +# endif +# define UINT16_C(v) ((uint16_t) (v)) +#elif (USHRT_MAX == UINT16_MAX) + typedef unsigned short uint16_t; +# define UINT16_C(v) ((uint16_t) (v)) +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "h" +# endif +#else +#error "Platform not supported" +#endif +#endif + +#ifndef INT16_MAX +# define INT16_MAX 0x7fff +#endif +#ifndef INT16_MIN +# define INT16_MIN INT16_C(0x8000) +#endif +#ifndef int16_t +#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S) + typedef signed int int16_t; +# define INT16_C(v) ((int16_t) (v)) +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "" +# endif +#elif (SHRT_MAX == INT16_MAX) + typedef signed short int16_t; +# define INT16_C(v) ((int16_t) (v)) +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "h" +# endif +#else +#error "Platform not supported" +#endif +#endif + +#ifndef UINT32_MAX +# define UINT32_MAX (0xffffffffUL) +#endif +#ifndef uint32_t +#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S) + typedef unsigned long uint32_t; +# define UINT32_C(v) v ## UL +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "l" +# endif +#elif (UINT_MAX == UINT32_MAX) + typedef unsigned int uint32_t; +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +# define UINT32_C(v) v ## U +#elif (USHRT_MAX == UINT32_MAX) + typedef unsigned short uint32_t; +# define UINT32_C(v) ((unsigned short) (v)) +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +#else +#error "Platform not supported" +#endif +#endif + +#ifndef INT32_MAX +# define INT32_MAX (0x7fffffffL) +#endif +#ifndef INT32_MIN +# define INT32_MIN INT32_C(0x80000000) +#endif +#ifndef int32_t +#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S) + typedef signed long int32_t; +# define INT32_C(v) v ## L +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "l" +# endif +#elif (INT_MAX == INT32_MAX) + typedef signed int int32_t; +# define INT32_C(v) v +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +#elif (SHRT_MAX == INT32_MAX) + typedef signed short int32_t; +# define INT32_C(v) ((short) (v)) +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +#else +#error "Platform not supported" +#endif +#endif + +/* + * The macro stdint_int64_defined is temporarily used to record + * whether or not 64 integer support is available. It must be + * defined for any 64 integer extensions for new platforms that are + * added. + */ + +#undef stdint_int64_defined +#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S) +# if (__STDC__ && __STDC_VERSION >= 199901L) || defined (S_SPLINT_S) +# define stdint_int64_defined + typedef long long int64_t; + typedef unsigned long long uint64_t; +# define UINT64_C(v) v ## ULL +# define INT64_C(v) v ## LL +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# endif +#endif + +#if !defined (stdint_int64_defined) +# if defined(__GNUC__) +# define stdint_int64_defined + __extension__ typedef long long int64_t; + __extension__ typedef unsigned long long uint64_t; +# define UINT64_C(v) v ## ULL +# define INT64_C(v) v ## LL +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S) +# define stdint_int64_defined + typedef long long int64_t; + typedef unsigned long long uint64_t; +# define UINT64_C(v) v ## ULL +# define INT64_C(v) v ## LL +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC) +# define stdint_int64_defined + typedef __int64 int64_t; + typedef unsigned __int64 uint64_t; +# define UINT64_C(v) v ## UI64 +# define INT64_C(v) v ## I64 +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "I64" +# endif +# endif +#endif + +#if !defined (LONG_LONG_MAX) && defined (INT64_C) +# define LONG_LONG_MAX INT64_C (9223372036854775807) +#endif +#ifndef ULONG_LONG_MAX +# define ULONG_LONG_MAX UINT64_C (18446744073709551615) +#endif + +#if !defined (INT64_MAX) && defined (INT64_C) +# define INT64_MAX INT64_C (9223372036854775807) +#endif +#if !defined (INT64_MIN) && defined (INT64_C) +# define INT64_MIN INT64_C (-9223372036854775808) +#endif +#if !defined (UINT64_MAX) && defined (INT64_C) +# define UINT64_MAX UINT64_C (18446744073709551615) +#endif + +/* + * Width of hexadecimal for number field. + */ + +#ifndef PRINTF_INT64_HEX_WIDTH +# define PRINTF_INT64_HEX_WIDTH "16" +#endif +#ifndef PRINTF_INT32_HEX_WIDTH +# define PRINTF_INT32_HEX_WIDTH "8" +#endif +#ifndef PRINTF_INT16_HEX_WIDTH +# define PRINTF_INT16_HEX_WIDTH "4" +#endif +#ifndef PRINTF_INT8_HEX_WIDTH +# define PRINTF_INT8_HEX_WIDTH "2" +#endif + +#ifndef PRINTF_INT64_DEC_WIDTH +# define PRINTF_INT64_DEC_WIDTH "20" +#endif +#ifndef PRINTF_INT32_DEC_WIDTH +# define PRINTF_INT32_DEC_WIDTH "10" +#endif +#ifndef PRINTF_INT16_DEC_WIDTH +# define PRINTF_INT16_DEC_WIDTH "5" +#endif +#ifndef PRINTF_INT8_DEC_WIDTH +# define PRINTF_INT8_DEC_WIDTH "3" +#endif + +/* + * Ok, lets not worry about 128 bit integers for now. Moore's law says + * we don't need to worry about that until about 2040 at which point + * we'll have bigger things to worry about. + */ + +#ifdef stdint_int64_defined + typedef int64_t intmax_t; + typedef uint64_t uintmax_t; +# define INTMAX_MAX INT64_MAX +# define INTMAX_MIN INT64_MIN +# define UINTMAX_MAX UINT64_MAX +# define UINTMAX_C(v) UINT64_C(v) +# define INTMAX_C(v) INT64_C(v) +# ifndef PRINTF_INTMAX_MODIFIER +# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER +# endif +# ifndef PRINTF_INTMAX_HEX_WIDTH +# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH +# endif +# ifndef PRINTF_INTMAX_DEC_WIDTH +# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH +# endif +#else + typedef int32_t intmax_t; + typedef uint32_t uintmax_t; +# define INTMAX_MAX INT32_MAX +# define UINTMAX_MAX UINT32_MAX +# define UINTMAX_C(v) UINT32_C(v) +# define INTMAX_C(v) INT32_C(v) +# ifndef PRINTF_INTMAX_MODIFIER +# define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER +# endif +# ifndef PRINTF_INTMAX_HEX_WIDTH +# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH +# endif +# ifndef PRINTF_INTMAX_DEC_WIDTH +# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH +# endif +#endif + +/* + * Because this file currently only supports platforms which have + * precise powers of 2 as bit sizes for the default integers, the + * least definitions are all trivial. Its possible that a future + * version of this file could have different definitions. + */ + +#ifndef stdint_least_defined + typedef int8_t int_least8_t; + typedef uint8_t uint_least8_t; + typedef int16_t int_least16_t; + typedef uint16_t uint_least16_t; + typedef int32_t int_least32_t; + typedef uint32_t uint_least32_t; +# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER +# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER +# define UINT_LEAST8_MAX UINT8_MAX +# define INT_LEAST8_MAX INT8_MAX +# define UINT_LEAST16_MAX UINT16_MAX +# define INT_LEAST16_MAX INT16_MAX +# define UINT_LEAST32_MAX UINT32_MAX +# define INT_LEAST32_MAX INT32_MAX +# define INT_LEAST8_MIN INT8_MIN +# define INT_LEAST16_MIN INT16_MIN +# define INT_LEAST32_MIN INT32_MIN +# ifdef stdint_int64_defined + typedef int64_t int_least64_t; + typedef uint64_t uint_least64_t; +# define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER +# define UINT_LEAST64_MAX UINT64_MAX +# define INT_LEAST64_MAX INT64_MAX +# define INT_LEAST64_MIN INT64_MIN +# endif +#endif +#undef stdint_least_defined + +/* + * The ANSI C committee pretending to know or specify anything about + * performance is the epitome of misguided arrogance. The mandate of + * this file is to *ONLY* ever support that absolute minimum + * definition of the fast integer types, for compatibility purposes. + * No extensions, and no attempt to suggest what may or may not be a + * faster integer type will ever be made in this file. Developers are + * warned to stay away from these types when using this or any other + * stdint.h. + */ + +typedef int_least8_t int_fast8_t; +typedef uint_least8_t uint_fast8_t; +typedef int_least16_t int_fast16_t; +typedef uint_least16_t uint_fast16_t; +typedef int_least32_t int_fast32_t; +typedef uint_least32_t uint_fast32_t; +#define UINT_FAST8_MAX UINT_LEAST8_MAX +#define INT_FAST8_MAX INT_LEAST8_MAX +#define UINT_FAST16_MAX UINT_LEAST16_MAX +#define INT_FAST16_MAX INT_LEAST16_MAX +#define UINT_FAST32_MAX UINT_LEAST32_MAX +#define INT_FAST32_MAX INT_LEAST32_MAX +#define INT_FAST8_MIN INT_LEAST8_MIN +#define INT_FAST16_MIN INT_LEAST16_MIN +#define INT_FAST32_MIN INT_LEAST32_MIN +#ifdef stdint_int64_defined + typedef int_least64_t int_fast64_t; + typedef uint_least64_t uint_fast64_t; +# define UINT_FAST64_MAX UINT_LEAST64_MAX +# define INT_FAST64_MAX INT_LEAST64_MAX +# define INT_FAST64_MIN INT_LEAST64_MIN +#endif + +#undef stdint_int64_defined + +/* + * Whatever piecemeal, per compiler thing we can do about the wchar_t + * type limits. + */ + +#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__) +# include <wchar.h> +# ifndef WCHAR_MIN +# define WCHAR_MIN 0 +# endif +# ifndef WCHAR_MAX +# define WCHAR_MAX ((wchar_t)-1) +# endif +#endif + +/* + * Whatever piecemeal, per compiler/platform thing we can do about the + * (u)intptr_t types and limits. + */ + +#if defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED) +# define STDINT_H_UINTPTR_T_DEFINED +#endif + +#ifndef STDINT_H_UINTPTR_T_DEFINED +# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64) +# define stdint_intptr_bits 64 +# elif defined (__WATCOMC__) || defined (__TURBOC__) +# if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__) +# define stdint_intptr_bits 16 +# else +# define stdint_intptr_bits 32 +# endif +# elif defined (__i386__) || defined (_WIN32) || defined (WIN32) +# define stdint_intptr_bits 32 +# elif defined (__INTEL_COMPILER) +/* TODO -- what will Intel do about x86-64? */ +# endif + +# ifdef stdint_intptr_bits +# define stdint_intptr_glue3_i(a,b,c) a##b##c +# define stdint_intptr_glue3(a,b,c) stdint_intptr_glue3_i(a,b,c) +# ifndef PRINTF_INTPTR_MODIFIER +# define PRINTF_INTPTR_MODIFIER stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER) +# endif +# ifndef PTRDIFF_MAX +# define PTRDIFF_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX) +# endif +# ifndef PTRDIFF_MIN +# define PTRDIFF_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN) +# endif +# ifndef UINTPTR_MAX +# define UINTPTR_MAX stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX) +# endif +# ifndef INTPTR_MAX +# define INTPTR_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX) +# endif +# ifndef INTPTR_MIN +# define INTPTR_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN) +# endif +# ifndef INTPTR_C +# define INTPTR_C(x) stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x) +# endif +# ifndef UINTPTR_C +# define UINTPTR_C(x) stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x) +# endif + typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t; + typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t) intptr_t; +# else +/* TODO -- This following is likely wrong for some platforms, and does + nothing for the definition of uintptr_t. */ + typedef ptrdiff_t intptr_t; +# endif +# define STDINT_H_UINTPTR_T_DEFINED +#endif + +/* + * Assumes sig_atomic_t is signed and we have a 2s complement machine. + */ + +#ifndef SIG_ATOMIC_MAX +# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1) +#endif + +#endif + +#if defined (__TEST_PSTDINT_FOR_CORRECTNESS) + +/* + * Please compile with the maximum warning settings to make sure macros are not + * defined more than once. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#define glue3_aux(x,y,z) x ## y ## z +#define glue3(x,y,z) glue3_aux(x,y,z) + +#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0); +#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0); + +#define DECL(us,bits) glue3(DECL,us,) (bits) + +#define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits) + +int main () { + DECL(I,8) + DECL(U,8) + DECL(I,16) + DECL(U,16) + DECL(I,32) + DECL(U,32) +#ifdef INT64_MAX + DECL(I,64) + DECL(U,64) +#endif + intmax_t imax = INTMAX_C(0); + uintmax_t umax = UINTMAX_C(0); + char str0[256], str1[256]; + + sprintf (str0, "%d %x\n", 0, ~0); + + sprintf (str1, "%d %x\n", i8, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1); + sprintf (str1, "%u %x\n", u8, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1); + sprintf (str1, "%d %x\n", i16, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1); + sprintf (str1, "%u %x\n", u16, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1); + sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n", i32, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1); + sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n", u32, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1); +#ifdef INT64_MAX + sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n", i64, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1); +#endif + sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n", imax, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1); + sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n", umax, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1); + + TESTUMAX(8); + TESTUMAX(16); + TESTUMAX(32); +#ifdef INT64_MAX + TESTUMAX(64); +#endif + + return EXIT_SUCCESS; +} + +#endif @@ -1,325 +1,325 @@ -/*
-SHA-1 in C
-By Steve Reid <sreid@sea-to-sky.net>
-100% Public Domain
-
------------------
-Modified 7/98
-By James H. Brown <jbrown@burgoyne.com>
-Still 100% Public Domain
-
-Corrected a problem which generated improper hash values on 16 bit machines
-Routine SHA1Update changed from
- void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned int
-len)
-to
- void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned
-long len)
-
-The 'len' parameter was declared an int which works fine on 32 bit machines.
-However, on 16 bit machines an int is too small for the shifts being done
-against
-it. This caused the hash function to generate incorrect values if len was
-greater than 8191 (8K - 1) due to the 'len << 3' on line 3 of SHA1Update().
-
-Since the file IO in main() reads 16K at a time, any file 8K or larger would
-be guaranteed to generate the wrong hash (e.g. Test Vector #3, a million
-"a"s).
-
-I also changed the declaration of variables i & j in SHA1Update to
-unsigned long from unsigned int for the same reason.
-
-These changes should make no difference to any 32 bit implementations since
-an
-int and a long are the same size in those environments.
-
---
-I also corrected a few compiler warnings generated by Borland C.
-1. Added #include <process.h> for exit() prototype
-2. Removed unused variable 'j' in SHA1Final
-3. Changed exit(0) to return(0) at end of main.
-
-ALL changes I made can be located by searching for comments containing 'JHB'
------------------
-Modified 8/98
-By Steve Reid <sreid@sea-to-sky.net>
-Still 100% public domain
-
-1- Removed #include <process.h> and used return() instead of exit()
-2- Fixed overwriting of finalcount in SHA1Final() (discovered by Chris Hall)
-3- Changed email address from steve@edmweb.com to sreid@sea-to-sky.net
-
------------------
-Modified 4/01
-By Saul Kravitz <Saul.Kravitz@celera.com>
-Still 100% PD
-Modified to run on Compaq Alpha hardware.
-
------------------
-Modified 07/2002
-By Ralph Giles <giles@ghostscript.com>
-Still 100% public domain
-modified for use with stdint types, autoconf
-code cleanup, removed attribution comments
-switched SHA1Final() argument order for consistency
-use SHA1_ prefix for public api
-move public api to sha1.h
-*/
-
-/*
-Test Vectors (from FIPS PUB 180-1)
-"abc"
- A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D
-"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
- 84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1
-A million repetitions of "a"
- 34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
-*/
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include "sha1.h"
-
-#if defined(_MSC_VER)
-#pragma warning(disable : 4267)
-#pragma warning(disable : 4996)
-#pragma warning(disable : 4100)
-#endif
-
-void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]);
-
-#define rol ROTL32
-
-/* blk0() and blk() perform the initial expand. */
-/* I got the idea of expanding during the round function from SSLeay */
-/* FIXME: can we do this in an endian-proof way? */
-
-#ifdef WORDS_BIGENDIAN
-#define blk0(i) block->l[i]
-#else
-#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) | (rol(block->l[i],8)&0x00FF00FF))
-#endif
-#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] ^ block->l[(i+2)&15]^block->l[i&15],1))
-
-/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
-#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);
-#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);
-#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);
-#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
-#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
-
-
-/* Hash a single 512-bit block. This is the core of the algorithm. */
-void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64])
-{
- uint32_t a, b, c, d, e;
- typedef union {
- uint8_t c[64];
- uint32_t l[16];
- } CHAR64LONG16;
- CHAR64LONG16* block;
-
- block = (CHAR64LONG16*)buffer;
-
- /* Copy context->state[] to working vars */
- a = state[0];
- b = state[1];
- c = state[2];
- d = state[3];
- e = state[4];
-
- /* 4 rounds of 20 operations each. Loop unrolled. */
- R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
- R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
- R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
- R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
- R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
- R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
- R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
- R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
- R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
- R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
- R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
- R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
- R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
- R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
- R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
- R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
- R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
- R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
- R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
- R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
-
- /* Add the working vars back into context.state[] */
- state[0] += a;
- state[1] += b;
- state[2] += c;
- state[3] += d;
- state[4] += e;
-
- /* Wipe variables */
- a = b = c = d = e = 0;
-}
-
-
-/* SHA1Init - Initialize new context */
-void SHA1_Init(SHA1_CTX* context)
-{
- /* SHA1 initialization constants */
- context->state[0] = 0x67452301;
- context->state[1] = 0xEFCDAB89;
- context->state[2] = 0x98BADCFE;
- context->state[3] = 0x10325476;
- context->state[4] = 0xC3D2E1F0;
- context->count[0] = 0;
- context->count[1] = 0;
-}
-
-
-/* Run your data through this. */
-void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len)
-{
- size_t i, j;
-
- j = (context->count[0] >> 3) & 63;
- if ((context->count[0] += len << 3) < (len << 3)) context->count[1]++;
-
- context->count[1] += (len >> 29);
-
- if ((j + len) > 63)
- {
- memcpy(&context->buffer[j], data, (i = 64-j));
- SHA1_Transform(context->state, context->buffer);
-
- for ( ; i + 63 < len; i += 64)
- {
- SHA1_Transform(context->state, data + i);
- }
-
- j = 0;
- }
- else i = 0;
- memcpy(&context->buffer[j], &data[i], len - i);
-}
-
-
-/* Add padding and return the message digest. */
-void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE])
-{
- uint32_t i;
- uint8_t finalcount[8];
-
- for (i = 0; i < 8; i++) {
- finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)]
- >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */
- }
- SHA1_Update(context, (uint8_t *)"\200", 1);
- while ((context->count[0] & 504) != 448) {
- SHA1_Update(context, (uint8_t *)"\0", 1);
- }
- SHA1_Update(context, finalcount, 8); /* Should cause a SHA1_Transform() */
- for (i = 0; i < SHA1_DIGEST_SIZE; i++) {
- digest[i] = (uint8_t)
- ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
- }
-
- /* Wipe variables */
- i = 0;
- memset(context->buffer, 0, 64);
- memset(context->state, 0, 20);
- memset(context->count, 0, 8);
- memset(finalcount, 0, 8); /* SWR */
-}
-
-//-----------------------------------------------------------------------------
-
-void sha1_32a ( const void * key, int len, uint32_t seed, void * out )
-{
- SHA1_CTX context;
-
- uint8_t digest[20];
-
- SHA1_Init(&context);
- SHA1_Update(&context, (uint8_t*)key, len);
- SHA1_Final(&context, digest);
-
- memcpy(out,&digest[0],4);
-}
-
-//-----------------------------------------------------------------------------
-// self test
-
-//#define TEST
-
-#ifdef TEST
-
-static char *test_data[] = {
- "abc",
- "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
- "A million repetitions of 'a'"};
-static char *test_results[] = {
- "A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D",
- "84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1",
- "34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F"};
-
-
-void digest_to_hex(const uint8_t digest[SHA1_DIGEST_SIZE], char *output)
-{
- int i,j;
- char *c = output;
-
- for (i = 0; i < SHA1_DIGEST_SIZE/4; i++) {
- for (j = 0; j < 4; j++) {
- sprintf(c,"%02X", digest[i*4+j]);
- c += 2;
- }
- sprintf(c, " ");
- c += 1;
- }
- *(c - 1) = '\0';
-}
-
-int main(int argc, char** argv)
-{
- int k;
- SHA1_CTX context;
- uint8_t digest[20];
- char output[80];
-
- fprintf(stdout, "verifying SHA-1 implementation... ");
-
- for (k = 0; k < 2; k++){
- SHA1_Init(&context);
- SHA1_Update(&context, (uint8_t*)test_data[k], strlen(test_data[k]));
- SHA1_Final(&context, digest);
- digest_to_hex(digest, output);
-
- if (strcmp(output, test_results[k])) {
- fprintf(stdout, "FAIL\n");
- fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[k]);
- fprintf(stderr,"\t%s returned\n", output);
- fprintf(stderr,"\t%s is correct\n", test_results[k]);
- return (1);
- }
- }
- /* million 'a' vector we feed separately */
- SHA1_Init(&context);
- for (k = 0; k < 1000000; k++)
- SHA1_Update(&context, (uint8_t*)"a", 1);
- SHA1_Final(&context, digest);
- digest_to_hex(digest, output);
- if (strcmp(output, test_results[2])) {
- fprintf(stdout, "FAIL\n");
- fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[2]);
- fprintf(stderr,"\t%s returned\n", output);
- fprintf(stderr,"\t%s is correct\n", test_results[2]);
- return (1);
- }
-
- /* success */
- fprintf(stdout, "ok\n");
- return(0);
-}
-#endif /* TEST */
+/* +SHA-1 in C +By Steve Reid <sreid@sea-to-sky.net> +100% Public Domain + +----------------- +Modified 7/98 +By James H. Brown <jbrown@burgoyne.com> +Still 100% Public Domain + +Corrected a problem which generated improper hash values on 16 bit machines +Routine SHA1Update changed from + void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned int +len) +to + void SHA1Update(SHA1_CTX* context, unsigned char* data, unsigned +long len) + +The 'len' parameter was declared an int which works fine on 32 bit machines. +However, on 16 bit machines an int is too small for the shifts being done +against +it. This caused the hash function to generate incorrect values if len was +greater than 8191 (8K - 1) due to the 'len << 3' on line 3 of SHA1Update(). + +Since the file IO in main() reads 16K at a time, any file 8K or larger would +be guaranteed to generate the wrong hash (e.g. Test Vector #3, a million +"a"s). + +I also changed the declaration of variables i & j in SHA1Update to +unsigned long from unsigned int for the same reason. + +These changes should make no difference to any 32 bit implementations since +an +int and a long are the same size in those environments. + +-- +I also corrected a few compiler warnings generated by Borland C. +1. Added #include <process.h> for exit() prototype +2. Removed unused variable 'j' in SHA1Final +3. Changed exit(0) to return(0) at end of main. + +ALL changes I made can be located by searching for comments containing 'JHB' +----------------- +Modified 8/98 +By Steve Reid <sreid@sea-to-sky.net> +Still 100% public domain + +1- Removed #include <process.h> and used return() instead of exit() +2- Fixed overwriting of finalcount in SHA1Final() (discovered by Chris Hall) +3- Changed email address from steve@edmweb.com to sreid@sea-to-sky.net + +----------------- +Modified 4/01 +By Saul Kravitz <Saul.Kravitz@celera.com> +Still 100% PD +Modified to run on Compaq Alpha hardware. + +----------------- +Modified 07/2002 +By Ralph Giles <giles@ghostscript.com> +Still 100% public domain +modified for use with stdint types, autoconf +code cleanup, removed attribution comments +switched SHA1Final() argument order for consistency +use SHA1_ prefix for public api +move public api to sha1.h +*/ + +/* +Test Vectors (from FIPS PUB 180-1) +"abc" + A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D +"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" + 84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1 +A million repetitions of "a" + 34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F +*/ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#include "sha1.h" + +#if defined(_MSC_VER) +#pragma warning(disable : 4267) +#pragma warning(disable : 4996) +#pragma warning(disable : 4100) +#endif + +void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]); + +#define rol ROTL32 + +/* blk0() and blk() perform the initial expand. */ +/* I got the idea of expanding during the round function from SSLeay */ +/* FIXME: can we do this in an endian-proof way? */ + +#ifdef WORDS_BIGENDIAN +#define blk0(i) block->l[i] +#else +#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) | (rol(block->l[i],8)&0x00FF00FF)) +#endif +#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] ^ block->l[(i+2)&15]^block->l[i&15],1)) + +/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */ +#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30); +#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30); +#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30); +#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30); +#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30); + + +/* Hash a single 512-bit block. This is the core of the algorithm. */ +void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]) +{ + uint32_t a, b, c, d, e; + typedef union { + uint8_t c[64]; + uint32_t l[16]; + } CHAR64LONG16; + CHAR64LONG16* block; + + block = (CHAR64LONG16*)buffer; + + /* Copy context->state[] to working vars */ + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + + /* 4 rounds of 20 operations each. Loop unrolled. */ + R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); + R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); + R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11); + R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15); + R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); + R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); + R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); + R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); + R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); + R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); + R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); + R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); + R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); + R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); + R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); + R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); + R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); + R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); + R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); + R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); + + /* Add the working vars back into context.state[] */ + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; + + /* Wipe variables */ + a = b = c = d = e = 0; +} + + +/* SHA1Init - Initialize new context */ +void SHA1_Init(SHA1_CTX* context) +{ + /* SHA1 initialization constants */ + context->state[0] = 0x67452301; + context->state[1] = 0xEFCDAB89; + context->state[2] = 0x98BADCFE; + context->state[3] = 0x10325476; + context->state[4] = 0xC3D2E1F0; + context->count[0] = 0; + context->count[1] = 0; +} + + +/* Run your data through this. */ +void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len) +{ + size_t i, j; + + j = (context->count[0] >> 3) & 63; + if ((context->count[0] += len << 3) < (len << 3)) context->count[1]++; + + context->count[1] += (len >> 29); + + if ((j + len) > 63) + { + memcpy(&context->buffer[j], data, (i = 64-j)); + SHA1_Transform(context->state, context->buffer); + + for ( ; i + 63 < len; i += 64) + { + SHA1_Transform(context->state, data + i); + } + + j = 0; + } + else i = 0; + memcpy(&context->buffer[j], &data[i], len - i); +} + + +/* Add padding and return the message digest. */ +void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE]) +{ + uint32_t i; + uint8_t finalcount[8]; + + for (i = 0; i < 8; i++) { + finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] + >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */ + } + SHA1_Update(context, (uint8_t *)"\200", 1); + while ((context->count[0] & 504) != 448) { + SHA1_Update(context, (uint8_t *)"\0", 1); + } + SHA1_Update(context, finalcount, 8); /* Should cause a SHA1_Transform() */ + for (i = 0; i < SHA1_DIGEST_SIZE; i++) { + digest[i] = (uint8_t) + ((context->state[i>>2] >> ((3-(i & 3)) * 8) ) & 255); + } + + /* Wipe variables */ + i = 0; + memset(context->buffer, 0, 64); + memset(context->state, 0, 20); + memset(context->count, 0, 8); + memset(finalcount, 0, 8); /* SWR */ +} + +//----------------------------------------------------------------------------- + +void sha1_32a ( const void * key, int len, uint32_t seed, void * out ) +{ + SHA1_CTX context; + + uint8_t digest[20]; + + SHA1_Init(&context); + SHA1_Update(&context, (uint8_t*)key, len); + SHA1_Final(&context, digest); + + memcpy(out,&digest[0],4); +} + +//----------------------------------------------------------------------------- +// self test + +//#define TEST + +#ifdef TEST + +static char *test_data[] = { + "abc", + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", + "A million repetitions of 'a'"}; +static char *test_results[] = { + "A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D", + "84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1", + "34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F"}; + + +void digest_to_hex(const uint8_t digest[SHA1_DIGEST_SIZE], char *output) +{ + int i,j; + char *c = output; + + for (i = 0; i < SHA1_DIGEST_SIZE/4; i++) { + for (j = 0; j < 4; j++) { + sprintf(c,"%02X", digest[i*4+j]); + c += 2; + } + sprintf(c, " "); + c += 1; + } + *(c - 1) = '\0'; +} + +int main(int argc, char** argv) +{ + int k; + SHA1_CTX context; + uint8_t digest[20]; + char output[80]; + + fprintf(stdout, "verifying SHA-1 implementation... "); + + for (k = 0; k < 2; k++){ + SHA1_Init(&context); + SHA1_Update(&context, (uint8_t*)test_data[k], strlen(test_data[k])); + SHA1_Final(&context, digest); + digest_to_hex(digest, output); + + if (strcmp(output, test_results[k])) { + fprintf(stdout, "FAIL\n"); + fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[k]); + fprintf(stderr,"\t%s returned\n", output); + fprintf(stderr,"\t%s is correct\n", test_results[k]); + return (1); + } + } + /* million 'a' vector we feed separately */ + SHA1_Init(&context); + for (k = 0; k < 1000000; k++) + SHA1_Update(&context, (uint8_t*)"a", 1); + SHA1_Final(&context, digest); + digest_to_hex(digest, output); + if (strcmp(output, test_results[2])) { + fprintf(stdout, "FAIL\n"); + fprintf(stderr,"* hash of \"%s\" incorrect:\n", test_data[2]); + fprintf(stderr,"\t%s returned\n", output); + fprintf(stderr,"\t%s is correct\n", test_results[2]); + return (1); + } + + /* success */ + fprintf(stdout, "ok\n"); + return(0); +} +#endif /* TEST */ @@ -1,21 +1,21 @@ -/* public api for steve reid's public domain SHA-1 implementation */
-/* this file is in the public domain */
-
-#pragma once
-
-#include "Platform.h"
-
-struct SHA1_CTX
-{
- uint32_t state[5];
- uint32_t count[2];
- uint8_t buffer[64];
-};
-
-#define SHA1_DIGEST_SIZE 20
-
-void SHA1_Init(SHA1_CTX* context);
-void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len);
-void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE]);
-
+/* public api for steve reid's public domain SHA-1 implementation */ +/* this file is in the public domain */ + +#pragma once + +#include "Platform.h" + +struct SHA1_CTX +{ + uint32_t state[5]; + uint32_t count[2]; + uint8_t buffer[64]; +}; + +#define SHA1_DIGEST_SIZE 20 + +void SHA1_Init(SHA1_CTX* context); +void SHA1_Update(SHA1_CTX* context, const uint8_t* data, const size_t len); +void SHA1_Final(SHA1_CTX* context, uint8_t digest[SHA1_DIGEST_SIZE]); + void sha1_32a ( const void * key, int len, uint32_t seed, void * out );
\ No newline at end of file |