aboutsummaryrefslogtreecommitdiff
path: root/bench/perf_monitoring/gemm
diff options
context:
space:
mode:
Diffstat (limited to 'bench/perf_monitoring/gemm')
-rw-r--r--bench/perf_monitoring/gemm/changesets.txt61
-rw-r--r--bench/perf_monitoring/gemm/gemm.cpp67
-rw-r--r--bench/perf_monitoring/gemm/gemm_settings.txt15
-rw-r--r--bench/perf_monitoring/gemm/lazy_gemm.cpp98
-rw-r--r--bench/perf_monitoring/gemm/lazy_gemm_settings.txt15
-rwxr-xr-xbench/perf_monitoring/gemm/make_plot.sh38
-rwxr-xr-xbench/perf_monitoring/gemm/run.sh156
7 files changed, 0 insertions, 450 deletions
diff --git a/bench/perf_monitoring/gemm/changesets.txt b/bench/perf_monitoring/gemm/changesets.txt
deleted file mode 100644
index af8eb9b8f..000000000
--- a/bench/perf_monitoring/gemm/changesets.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-#3.0.1
-#3.1.1
-#3.2.0
-3.2.4
-#5745:37f59e65eb6c
-5891:d8652709345d # introduce AVX
-#5893:24b4dc92c6d3 # merge
-5895:997c2ef9fc8b # introduce FMA
-#5904:e1eafd14eaa1 # complex and AVX
-5908:f8ee3c721251 # improve packing with ptranspose
-#5921:ca808bb456b0 # merge
-#5927:8b1001f9e3ac
-5937:5a4ca1ad8c53 # New gebp kernel handling up to 3 packets x 4 register-level blocks
-#5949:f3488f4e45b2 # merge
-#5969:e09031dccfd9 # Disable 3pX4 kernel on Altivec
-#5992:4a429f5e0483 # merge
-before-evaluators
-#6334:f6a45e5b8b7c # Implement evaluator for sparse outer products
-#6639:c9121c60b5c7
-#6655:06f163b5221f # Properly detect FMA support on ARM
-#6677:700e023044e7 # FMA has been wrongly disabled
-#6681:11d31dafb0e3
-#6699:5e6e8e10aad1 # merge default to tensors
-#6726:ff2d2388e7b9 # merge default to tensors
-#6742:0cbd6195e829 # merge default to tensors
-#6747:853d2bafeb8f # Generalized the gebp apis
-6765:71584fd55762 # Made the blocking computation aware of the l3 cache; Also optimized the blocking parameters to take into account the number of threads used for a computation
-#6781:9cc5a931b2c6 # generalized gemv
-#6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product
-#6844:039efd86b75c # merge tensor
-6845:7333ed40c6ef # change prefetching in gebp
-#6856:b5be5e10eb7f # merge index conversion
-#6893:c3a64aba7c70 # clean blocking size computation
-#6898:6fb31ebe6492 # rotating kernel for ARM
-6899:877facace746 # rotating kernel for ARM only
-#6904:c250623ae9fa # result_of
-6921:915f1b1fc158 # fix prefetching change for ARM
-6923:9ff25f6dacc6 # prefetching
-6933:52572e60b5d3 # blocking size strategy
-6937:c8c042f286b2 # avoid redundant pack_rhs
-6981:7e5d6f78da59 # dynamic loop swapping
-6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache
-6986:a675d05b6f8f # blocking heuristic: block on the rhs in L1 if the lhs fit in L1.
-7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5)
-7015:8aad8f35c955 # Refactor computeProductBlockingSizes to make room for the possibility of using lookup tables
-7016:a58d253e8c91 # Polish lookup tables generation
-7018:9b27294a8186 # actual_panel_rows computation should always be resilient to parameters not consistent with the known L1 cache size, see comment
-7019:c758b1e2c073 # Provide a empirical lookup table for blocking sizes measured on a Nexus 5. Only for float, only for Android on ARM 32bit for now.
-7085:627e039fba68 # Bug 986: add support for coefficient-based product with 0 depth.
-7098:b6f1db9cf9ec # Bug 992: don't select a 3p GEMM path with non-vectorizable scalar types, this hits unsupported paths in symm/triangular products code
-7591:09a8e2186610 # 3.3-alpha1
-7650:b0f3c8f43025 # help clang inlining
-#8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs)
-8789:efcb912e4356 # Made the index type a template parameter to evaluateProductBlockingSizes. Use numext::mini and numext::maxi instead of std::min/std::max to compute blocking sizes
-8972:81d53c711775 # Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path
-8985:d935df21a082 # Remove the rotating kernel.
-8988:6c2dc56e73b3 # Bug 256: enable vectorization with unaligned loads/stores.
-9148:b8b8c421e36c # Relax mixing-type constraints for binary coefficient-wise operators
-9174:d228bc282ac9 # merge
-9212:c90098affa7b # Fix performance regression introduced in changeset 8aad8f35c955
-9213:9f1c14e4694b # Fix performance regression in dgemm introduced by changeset 81d53c711775
diff --git a/bench/perf_monitoring/gemm/gemm.cpp b/bench/perf_monitoring/gemm/gemm.cpp
deleted file mode 100644
index 614bd4737..000000000
--- a/bench/perf_monitoring/gemm/gemm.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <Eigen/Core>
-#include "../../BenchTimer.h"
-using namespace Eigen;
-
-#ifndef SCALAR
-#error SCALAR must be defined
-#endif
-
-typedef SCALAR Scalar;
-
-typedef Matrix<Scalar,Dynamic,Dynamic> Mat;
-
-EIGEN_DONT_INLINE
-void gemm(const Mat &A, const Mat &B, Mat &C)
-{
- C.noalias() += A * B;
-}
-
-EIGEN_DONT_INLINE
-double bench(long m, long n, long k)
-{
- Mat A(m,k);
- Mat B(k,n);
- Mat C(m,n);
- A.setRandom();
- B.setRandom();
- C.setZero();
-
- BenchTimer t;
-
- double up = 1e8*4/sizeof(Scalar);
- double tm0 = 4, tm1 = 10;
- if(NumTraits<Scalar>::IsComplex)
- {
- up /= 4;
- tm0 = 2;
- tm1 = 4;
- }
-
- double flops = 2. * m * n * k;
- long rep = std::max(1., std::min(100., up/flops) );
- long tries = std::max(tm0, std::min(tm1, up/flops) );
-
- BENCH(t, tries, rep, gemm(A,B,C));
-
- return 1e-9 * rep * flops / t.best();
-}
-
-int main(int argc, char **argv)
-{
- std::vector<double> results;
-
- std::ifstream settings("gemm_settings.txt");
- long m, n, k;
- while(settings >> m >> n >> k)
- {
- //std::cerr << " Testing " << m << " " << n << " " << k << std::endl;
- results.push_back( bench(m, n, k) );
- }
-
- std::cout << RowVectorXd::Map(results.data(), results.size());
-
- return 0;
-}
diff --git a/bench/perf_monitoring/gemm/gemm_settings.txt b/bench/perf_monitoring/gemm/gemm_settings.txt
deleted file mode 100644
index 5c43e1c7d..000000000
--- a/bench/perf_monitoring/gemm/gemm_settings.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-8 8 8
-9 9 9
-24 24 24
-239 239 239
-240 240 240
-2400 24 24
-24 2400 24
-24 24 2400
-24 2400 2400
-2400 24 2400
-2400 2400 24
-2400 2400 64
-4800 23 160
-23 4800 160
-2400 2400 2400
diff --git a/bench/perf_monitoring/gemm/lazy_gemm.cpp b/bench/perf_monitoring/gemm/lazy_gemm.cpp
deleted file mode 100644
index 6dc370155..000000000
--- a/bench/perf_monitoring/gemm/lazy_gemm.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <Eigen/Core>
-#include "../../BenchTimer.h"
-using namespace Eigen;
-
-#ifndef SCALAR
-#error SCALAR must be defined
-#endif
-
-typedef SCALAR Scalar;
-
-template<typename MatA, typename MatB, typename MatC>
-EIGEN_DONT_INLINE
-void lazy_gemm(const MatA &A, const MatB &B, MatC &C)
-{
-// escape((void*)A.data());
-// escape((void*)B.data());
- C.noalias() += A.lazyProduct(B);
-// escape((void*)C.data());
-}
-
-template<int m, int n, int k, int TA>
-EIGEN_DONT_INLINE
-double bench()
-{
- typedef Matrix<Scalar,m,k,TA> MatA;
- typedef Matrix<Scalar,k,n> MatB;
- typedef Matrix<Scalar,m,n> MatC;
-
- MatA A(m,k);
- MatB B(k,n);
- MatC C(m,n);
- A.setRandom();
- B.setRandom();
- C.setZero();
-
- BenchTimer t;
-
- double up = 1e7*4/sizeof(Scalar);
- double tm0 = 10, tm1 = 20;
-
- double flops = 2. * m * n * k;
- long rep = std::max(10., std::min(10000., up/flops) );
- long tries = std::max(tm0, std::min(tm1, up/flops) );
-
- BENCH(t, tries, rep, lazy_gemm(A,B,C));
-
- return 1e-9 * rep * flops / t.best();
-}
-
-template<int m, int n, int k>
-double bench_t(int t)
-{
- if(t)
- return bench<m,n,k,RowMajor>();
- else
- return bench<m,n,k,0>();
-}
-
-EIGEN_DONT_INLINE
-double bench_mnk(int m, int n, int k, int t)
-{
- int id = m*10000 + n*100 + k;
- switch(id) {
- case 10101 : return bench_t< 1, 1, 1>(t); break;
- case 20202 : return bench_t< 2, 2, 2>(t); break;
- case 30303 : return bench_t< 3, 3, 3>(t); break;
- case 40404 : return bench_t< 4, 4, 4>(t); break;
- case 50505 : return bench_t< 5, 5, 5>(t); break;
- case 60606 : return bench_t< 6, 6, 6>(t); break;
- case 70707 : return bench_t< 7, 7, 7>(t); break;
- case 80808 : return bench_t< 8, 8, 8>(t); break;
- case 90909 : return bench_t< 9, 9, 9>(t); break;
- case 101010 : return bench_t<10,10,10>(t); break;
- case 111111 : return bench_t<11,11,11>(t); break;
- case 121212 : return bench_t<12,12,12>(t); break;
- }
- return 0;
-}
-
-int main(int argc, char **argv)
-{
- std::vector<double> results;
-
- std::ifstream settings("lazy_gemm_settings.txt");
- long m, n, k, t;
- while(settings >> m >> n >> k >> t)
- {
- //std::cerr << " Testing " << m << " " << n << " " << k << std::endl;
- results.push_back( bench_mnk(m, n, k, t) );
- }
-
- std::cout << RowVectorXd::Map(results.data(), results.size());
-
- return 0;
-}
diff --git a/bench/perf_monitoring/gemm/lazy_gemm_settings.txt b/bench/perf_monitoring/gemm/lazy_gemm_settings.txt
deleted file mode 100644
index 407d5d4fa..000000000
--- a/bench/perf_monitoring/gemm/lazy_gemm_settings.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-1 1 1 0
-2 2 2 0
-3 3 3 0
-4 4 4 0
-4 4 4 1
-5 5 5 0
-6 6 6 0
-7 7 7 0
-7 7 7 1
-8 8 8 0
-9 9 9 0
-10 10 10 0
-11 11 11 0
-12 12 12 0
-12 12 12 1
diff --git a/bench/perf_monitoring/gemm/make_plot.sh b/bench/perf_monitoring/gemm/make_plot.sh
deleted file mode 100755
index cd3214ac9..000000000
--- a/bench/perf_monitoring/gemm/make_plot.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-
-# base name of the bench
-# it reads $1.out
-# and generates $1.pdf
-WHAT=$1
-bench=$2
-
-header="rev "
-while read line
-do
- if [ ! -z '$line' ]; then
- header="$header \"$line\""
- fi
-done < $bench"_settings.txt"
-
-echo $header > $WHAT.out.header
-cat $WHAT.out >> $WHAT.out.header
-
-
-echo "set title '$WHAT'" > $WHAT.gnuplot
-echo "set key autotitle columnhead outside " >> $WHAT.gnuplot
-echo "set xtics rotate 1" >> $WHAT.gnuplot
-
-echo "set term pdf color rounded enhanced fontscale 0.35 size 7in,5in" >> $WHAT.gnuplot
-echo set output "'"$WHAT.pdf"'" >> $WHAT.gnuplot
-
-col=`cat $bench"_settings.txt" | wc -l`
-echo "plot for [col=2:$col+1] '$WHAT.out.header' using 0:col:xticlabels(1) with lines" >> $WHAT.gnuplot
-echo " " >> $WHAT.gnuplot
-
-gnuplot -persist < $WHAT.gnuplot
-
-# generate a png file
-# convert -background white -density 120 -rotate 90 -resize 800 +dither -colors 256 -quality 0 $WHAT.ps -background white -flatten .$WHAT.png
-
-# clean
-rm $WHAT.out.header $WHAT.gnuplot \ No newline at end of file
diff --git a/bench/perf_monitoring/gemm/run.sh b/bench/perf_monitoring/gemm/run.sh
deleted file mode 100755
index 9d6ee40bc..000000000
--- a/bench/perf_monitoring/gemm/run.sh
+++ /dev/null
@@ -1,156 +0,0 @@
-#!/bin/bash
-
-# ./run.sh gemm
-# ./run.sh lazy_gemm
-
-# Examples of environment variables to be set:
-# PREFIX="haswell-fma-"
-# CXX_FLAGS="-mfma"
-
-# Options:
-# -up : enforce the recomputation of existing data, and keep best results as a merging strategy
-# -s : recompute selected changesets only and keep bests
-
-bench=$1
-
-if echo "$*" | grep '\-up' > /dev/null; then
- update=true
-else
- update=false
-fi
-
-if echo "$*" | grep '\-s' > /dev/null; then
- selected=true
-else
- selected=false
-fi
-
-global_args="$*"
-
-if [ $selected == true ]; then
- echo "Recompute selected changesets only and keep bests"
-elif [ $update == true ]; then
- echo "(Re-)Compute all changesets and keep bests"
-else
- echo "Skip previously computed changesets"
-fi
-
-
-
-if [ ! -d "eigen_src" ]; then
- hg clone https://bitbucket.org/eigen/eigen eigen_src
-else
- cd eigen_src
- hg pull -u
- cd ..
-fi
-
-if [ ! -z '$CXX' ]; then
- CXX=g++
-fi
-
-function make_backup
-{
- if [ -f "$1.out" ]; then
- mv "$1.out" "$1.backup"
- fi
-}
-
-function merge
-{
- count1=`echo $1 | wc -w`
- count2=`echo $2 | wc -w`
-
- if [ $count1 == $count2 ]; then
- a=( $1 ); b=( $2 )
- res=""
- for (( i=0 ; i<$count1 ; i++ )); do
- ai=${a[$i]}; bi=${b[$i]}
- tmp=`echo "if ($ai > $bi) $ai else $bi " | bc -l`
- res="$res $tmp"
- done
- echo $res
-
- else
- echo $1
- fi
-}
-
-function test_current
-{
- rev=$1
- scalar=$2
- name=$3
-
- prev=""
- if [ -e "$name.backup" ]; then
- prev=`grep $rev "$name.backup" | cut -c 14-`
- fi
- res=$prev
- count_rev=`echo $prev | wc -w`
- count_ref=`cat $bench"_settings.txt" | wc -l`
- if echo "$global_args" | grep "$rev" > /dev/null; then
- rev_found=true
- else
- rev_found=false
- fi
-# echo $update et $selected et $rev_found because $rev et "$global_args"
-# echo $count_rev et $count_ref
- if [ $update == true ] || [ $count_rev != $count_ref ] || ([ $selected == true ] && [ $rev_found == true ]); then
- if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src $bench.cpp -DSCALAR=$scalar -o $name; then
- curr=`./$name`
- if [ $count_rev == $count_ref ]; then
- echo "merge previous $prev"
- echo "with new $curr"
- else
- echo "got $curr"
- fi
- res=`merge "$curr" "$prev"`
-# echo $res
- echo "$rev $res" >> $name.out
- else
- echo "Compilation failed, skip rev $rev"
- fi
- else
- echo "Skip existing results for $rev / $name"
- echo "$rev $res" >> $name.out
- fi
-}
-
-make_backup $PREFIX"s"$bench
-make_backup $PREFIX"d"$bench
-make_backup $PREFIX"c"$bench
-
-cut -f1 -d"#" < changesets.txt | grep -E '[[:alnum:]]' | while read rev
-do
- if [ ! -z '$rev' ]; then
- echo "Testing rev $rev"
- cd eigen_src
- hg up -C $rev > /dev/null
- actual_rev=`hg identify | cut -f1 -d' '`
- cd ..
-
- test_current $actual_rev float $PREFIX"s"$bench
- test_current $actual_rev double $PREFIX"d"$bench
- test_current $actual_rev "std::complex<double>" $PREFIX"c"$bench
- fi
-
-done
-
-echo "Float:"
-cat $PREFIX"s""$bench.out"
-echo " "
-
-echo "Double:"
-cat $PREFIX"d""$bench.out"
-echo ""
-
-echo "Complex:"
-cat $PREFIX"c""$bench.out"
-echo ""
-
-./make_plot.sh $PREFIX"s"$bench $bench
-./make_plot.sh $PREFIX"d"$bench $bench
-./make_plot.sh $PREFIX"c"$bench $bench
-
-