aboutsummaryrefslogtreecommitdiff
path: root/bench/benchVecAdd.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'bench/benchVecAdd.cpp')
-rw-r--r--bench/benchVecAdd.cpp135
1 files changed, 135 insertions, 0 deletions
diff --git a/bench/benchVecAdd.cpp b/bench/benchVecAdd.cpp
new file mode 100644
index 000000000..ce8e1e911
--- /dev/null
+++ b/bench/benchVecAdd.cpp
@@ -0,0 +1,135 @@
+
+#include <iostream>
+#include <Eigen/Core>
+#include <bench/BenchTimer.h>
+using namespace Eigen;
+
+#ifndef SIZE
+#define SIZE 50
+#endif
+
+#ifndef REPEAT
+#define REPEAT 10000
+#endif
+
+typedef float Scalar;
+
+__attribute__ ((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size);
+__attribute__ ((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c);
+__attribute__ ((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c);
+
+int main(int argc, char* argv[])
+{
+ int size = SIZE * 8;
+ int size2 = size * size;
+ Scalar* a = internal::aligned_new<Scalar>(size2);
+ Scalar* b = internal::aligned_new<Scalar>(size2+4)+1;
+ Scalar* c = internal::aligned_new<Scalar>(size2);
+
+ for (int i=0; i<size; ++i)
+ {
+ a[i] = b[i] = c[i] = 0;
+ }
+
+ BenchTimer timer;
+
+ timer.reset();
+ for (int k=0; k<10; ++k)
+ {
+ timer.start();
+ benchVec(a, b, c, size2);
+ timer.stop();
+ }
+ std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
+ return 0;
+ for (int innersize = size; innersize>2 ; --innersize)
+ {
+ if (size2%innersize==0)
+ {
+ int outersize = size2/innersize;
+ MatrixXf ma = Map<MatrixXf>(a, innersize, outersize );
+ MatrixXf mb = Map<MatrixXf>(b, innersize, outersize );
+ MatrixXf mc = Map<MatrixXf>(c, innersize, outersize );
+ timer.reset();
+ for (int k=0; k<3; ++k)
+ {
+ timer.start();
+ benchVec(ma, mb, mc);
+ timer.stop();
+ }
+ std::cout << innersize << " x " << outersize << " " << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
+ }
+ }
+
+ VectorXf va = Map<VectorXf>(a, size2);
+ VectorXf vb = Map<VectorXf>(b, size2);
+ VectorXf vc = Map<VectorXf>(c, size2);
+ timer.reset();
+ for (int k=0; k<3; ++k)
+ {
+ timer.start();
+ benchVec(va, vb, vc);
+ timer.stop();
+ }
+ std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
+
+ return 0;
+}
+
+void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c)
+{
+ for (int k=0; k<REPEAT; ++k)
+ a = a + b;
+}
+
+void benchVec(VectorXf& a, VectorXf& b, VectorXf& c)
+{
+ for (int k=0; k<REPEAT; ++k)
+ a = a + b;
+}
+
+void benchVec(Scalar* a, Scalar* b, Scalar* c, int size)
+{
+ typedef internal::packet_traits<Scalar>::type PacketScalar;
+ const int PacketSize = internal::packet_traits<Scalar>::size;
+ PacketScalar a0, a1, a2, a3, b0, b1, b2, b3;
+ for (int k=0; k<REPEAT; ++k)
+ for (int i=0; i<size; i+=PacketSize*8)
+ {
+// a0 = internal::pload(&a[i]);
+// b0 = internal::pload(&b[i]);
+// a1 = internal::pload(&a[i+1*PacketSize]);
+// b1 = internal::pload(&b[i+1*PacketSize]);
+// a2 = internal::pload(&a[i+2*PacketSize]);
+// b2 = internal::pload(&b[i+2*PacketSize]);
+// a3 = internal::pload(&a[i+3*PacketSize]);
+// b3 = internal::pload(&b[i+3*PacketSize]);
+// internal::pstore(&a[i], internal::padd(a0, b0));
+// a0 = internal::pload(&a[i+4*PacketSize]);
+// b0 = internal::pload(&b[i+4*PacketSize]);
+//
+// internal::pstore(&a[i+1*PacketSize], internal::padd(a1, b1));
+// a1 = internal::pload(&a[i+5*PacketSize]);
+// b1 = internal::pload(&b[i+5*PacketSize]);
+//
+// internal::pstore(&a[i+2*PacketSize], internal::padd(a2, b2));
+// a2 = internal::pload(&a[i+6*PacketSize]);
+// b2 = internal::pload(&b[i+6*PacketSize]);
+//
+// internal::pstore(&a[i+3*PacketSize], internal::padd(a3, b3));
+// a3 = internal::pload(&a[i+7*PacketSize]);
+// b3 = internal::pload(&b[i+7*PacketSize]);
+//
+// internal::pstore(&a[i+4*PacketSize], internal::padd(a0, b0));
+// internal::pstore(&a[i+5*PacketSize], internal::padd(a1, b1));
+// internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2));
+// internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3));
+
+ internal::pstore(&a[i+2*PacketSize], internal::padd(internal::ploadu(&a[i+2*PacketSize]), internal::ploadu(&b[i+2*PacketSize])));
+ internal::pstore(&a[i+3*PacketSize], internal::padd(internal::ploadu(&a[i+3*PacketSize]), internal::ploadu(&b[i+3*PacketSize])));
+ internal::pstore(&a[i+4*PacketSize], internal::padd(internal::ploadu(&a[i+4*PacketSize]), internal::ploadu(&b[i+4*PacketSize])));
+ internal::pstore(&a[i+5*PacketSize], internal::padd(internal::ploadu(&a[i+5*PacketSize]), internal::ploadu(&b[i+5*PacketSize])));
+ internal::pstore(&a[i+6*PacketSize], internal::padd(internal::ploadu(&a[i+6*PacketSize]), internal::ploadu(&b[i+6*PacketSize])));
+ internal::pstore(&a[i+7*PacketSize], internal::padd(internal::ploadu(&a[i+7*PacketSize]), internal::ploadu(&b[i+7*PacketSize])));
+ }
+}