aboutsummaryrefslogtreecommitdiff
path: root/bench/benchBlasGemm.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'bench/benchBlasGemm.cpp')
-rw-r--r--bench/benchBlasGemm.cpp219
1 files changed, 219 insertions, 0 deletions
diff --git a/bench/benchBlasGemm.cpp b/bench/benchBlasGemm.cpp
new file mode 100644
index 000000000..cb086a555
--- /dev/null
+++ b/bench/benchBlasGemm.cpp
@@ -0,0 +1,219 @@
+// g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
+// possible options:
+// -DEIGEN_DONT_VECTORIZE
+// -msse2
+
+// #define EIGEN_DEFAULT_TO_ROW_MAJOR
+#define _FLOAT
+
+#include <iostream>
+
+#include <Eigen/Core>
+#include "BenchTimer.h"
+
+// include the BLAS headers
+extern "C" {
+#include <cblas.h>
+}
+#include <string>
+
+#ifdef _FLOAT
+typedef float Scalar;
+#define CBLAS_GEMM cblas_sgemm
+#else
+typedef double Scalar;
+#define CBLAS_GEMM cblas_dgemm
+#endif
+
+
+typedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix;
+void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
+void check_product(int M, int N, int K);
+void check_product(void);
+
+int main(int argc, char *argv[])
+{
+ // disable SSE exceptions
+ #ifdef __GNUC__
+ {
+ int aux;
+ asm(
+ "stmxcsr %[aux] \n\t"
+ "orl $32832, %[aux] \n\t"
+ "ldmxcsr %[aux] \n\t"
+ : : [aux] "m" (aux));
+ }
+ #endif
+
+ int nbtries=1, nbloops=1, M, N, K;
+
+ if (argc==2)
+ {
+ if (std::string(argv[1])=="check")
+ check_product();
+ else
+ M = N = K = atoi(argv[1]);
+ }
+ else if ((argc==3) && (std::string(argv[1])=="auto"))
+ {
+ M = N = K = atoi(argv[2]);
+ nbloops = 1000000000/(M*M*M);
+ if (nbloops<1)
+ nbloops = 1;
+ nbtries = 6;
+ }
+ else if (argc==4)
+ {
+ M = N = K = atoi(argv[1]);
+ nbloops = atoi(argv[2]);
+ nbtries = atoi(argv[3]);
+ }
+ else if (argc==6)
+ {
+ M = atoi(argv[1]);
+ N = atoi(argv[2]);
+ K = atoi(argv[3]);
+ nbloops = atoi(argv[4]);
+ nbtries = atoi(argv[5]);
+ }
+ else
+ {
+ std::cout << "Usage: " << argv[0] << " size \n";
+ std::cout << "Usage: " << argv[0] << " auto size\n";
+ std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
+ std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
+ std::cout << "Usage: " << argv[0] << " check\n";
+ std::cout << "Options:\n";
+ std::cout << " size unique size of the 2 matrices (integer)\n";
+ std::cout << " auto automatically set the number of repetitions and tries\n";
+ std::cout << " nbloops number of times the GEMM routines is executed\n";
+ std::cout << " nbtries number of times the loop is benched (return the best try)\n";
+ std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n";
+ std::cout << " check check eigen product using cblas as a reference\n";
+ exit(1);
+ }
+
+ double nbmad = double(M) * double(N) * double(K) * double(nbloops);
+
+ if (!(std::string(argv[1])=="auto"))
+ std::cout << M << " x " << N << " x " << K << "\n";
+
+ Scalar alpha, beta;
+ MyMatrix ma(M,K), mb(K,N), mc(M,N);
+ ma = MyMatrix::Random(M,K);
+ mb = MyMatrix::Random(K,N);
+ mc = MyMatrix::Random(M,N);
+
+ Eigen::BenchTimer timer;
+
+ // we simply compute c += a*b, so:
+ alpha = 1;
+ beta = 1;
+
+ // bench cblas
+ // ROWS_A, COLS_B, COLS_A, 1.0, A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
+ if (!(std::string(argv[1])=="auto"))
+ {
+ timer.reset();
+ for (uint k=0 ; k<nbtries ; ++k)
+ {
+ timer.start();
+ for (uint j=0 ; j<nbloops ; ++j)
+ #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
+ CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
+ #else
+ CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
+ #endif
+ timer.stop();
+ }
+ if (!(std::string(argv[1])=="auto"))
+ std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
+ else
+ std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
+ }
+
+ // clear
+ ma = MyMatrix::Random(M,K);
+ mb = MyMatrix::Random(K,N);
+ mc = MyMatrix::Random(M,N);
+
+ // eigen
+// if (!(std::string(argv[1])=="auto"))
+ {
+ timer.reset();
+ for (uint k=0 ; k<nbtries ; ++k)
+ {
+ timer.start();
+ bench_eigengemm(mc, ma, mb, nbloops);
+ timer.stop();
+ }
+ if (!(std::string(argv[1])=="auto"))
+ std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
+ else
+ std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
+ }
+
+ std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
+ std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
+
+
+ return 0;
+}
+
+using namespace Eigen;
+
+void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
+{
+ for (uint j=0 ; j<nbloops ; ++j)
+ mc.noalias() += ma * mb;
+}
+
+#define MYVERIFY(A,M) if (!(A)) { \
+ std::cout << "FAIL: " << M << "\n"; \
+ }
+void check_product(int M, int N, int K)
+{
+ MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
+ ma = MyMatrix::Random(M,K);
+ mb = MyMatrix::Random(K,N);
+ maT = ma.transpose();
+ mbT = mb.transpose();
+ mc = MyMatrix::Random(M,N);
+
+ MyMatrix::Scalar eps = 1e-4;
+
+ meigen = mref = mc;
+ CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
+ meigen += ma * mb;
+ MYVERIFY(meigen.isApprox(mref, eps),". * .");
+
+ meigen = mref = mc;
+ CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
+ meigen += maT.transpose() * mb;
+ MYVERIFY(meigen.isApprox(mref, eps),"T * .");
+
+ meigen = mref = mc;
+ CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
+ meigen += (maT.transpose()) * (mbT.transpose());
+ MYVERIFY(meigen.isApprox(mref, eps),"T * T");
+
+ meigen = mref = mc;
+ CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
+ meigen += ma * mbT.transpose();
+ MYVERIFY(meigen.isApprox(mref, eps),". * T");
+}
+
+void check_product(void)
+{
+ int M, N, K;
+ for (uint i=0; i<1000; ++i)
+ {
+ M = internal::random<int>(1,64);
+ N = internal::random<int>(1,768);
+ K = internal::random<int>(1,768);
+ M = (0 + M) * 1;
+ std::cout << M << " x " << N << " x " << K << "\n";
+ check_product(M, N, K);
+ }
+}
+