ADD: new track message, Entity class and Position class

2022-12-20 17:20:35 +01:00
parent 469ecfb099
commit 98ebb563a8
2114 changed files with 482360 additions and 24 deletions
--- a/libs/eigen/bench/perf_monitoring/changesets.txt
+++ b/libs/eigen/bench/perf_monitoring/changesets.txt
@@ -0,0 +1,95 @@
+Load hg-to-git hash maps from ./eigen_git/.git/
+#3.0.1
+#3.1.1
+#3.2.0
+3.2.4
+#574a7621809fe
+58964a85800bd  # introduce AVX
+#589cbd7e98174  # merge
+589db7d49efbb  # introduce FMA
+#590a078f442a3  # complex and AVX
+590a419cea4a0  # improve packing with ptranspose
+#59251e85c936d  # merge
+#592e497a27ddc
+593d5a795f673  # New gebp kernel: up to 3 packets x 4 register-level blocks
+#5942c3c95990d  # merge
+#596c9788d55b9  # Disable 3pX4 kernel on Altivec
+#5999aa3dc4e21  # merge
+6209452eb38f8   # before-evaluators
+#6333eba5e1101  # Implement evaluator for sparse outer products
+#663b9d314ae19
+#6655ef95fabee  # Properly detect FMA support on ARM
+#667fe25f3b8e3   # FMA has been wrongly disabled
+#668409547a0c8
+#6694304c73542   # merge default to tensors
+#67216047c8d4a   # merge default to tensors
+#67410a79ca3a3   # merge default to tensors
+#674b7271dffb5   # Generalized the gebp apis
+676bfdd9f3ac9   # Made the blocking computation aware of the l3 cache;<br/> Also optimized the blocking parameters to take<br/> into account the number of threads used for a computation.
+6782dde63499c   # generalized gemv
+6799f98650d0a   # ensured that contractions that can be reduced to a matrix vector product
+#6840918c51e60   # merge tensor
+684e972b55ec4   # change prefetching in gebp
+#68598604576d1   # merge index conversion
+68963eb0f6fe6   # clean blocking size computation
+689db05f2d01e   # rotating kernel for ARM only
+#6901b7e12847d   # result_of
+69226275b250a   # fix prefetching change for ARM
+692692136350b   # prefetching
+693a8ad8887bf   # blocking size strategy
+693bcf9bb5c1f   # avoid redundant pack_rhs
+6987550107028   # dynamic loop swapping
+69858740ce4c6   # rm dynamic loop swapping,<br/> adjust lhs's micro panel height to fully exploit L1 cache
+698cd3bbffa73   # blocking heuristic:<br/> block on the rhs in L1 if the lhs fit in L1.
+701488c15615a   # organize a little our default cache sizes,<br/> and use a saner default L1 outside of x86 (10% faster on Nexus 5)
+701e56aabf205   # Refactor computeProductBlockingSizes to make room<br/> for the possibility of using lookup tables
+701ca5c12587b   # Polish lookup tables generation
+7013589a9c115   # actual_panel_rows computation should always be resilient<br/> to parameters not consistent with the known L1 cache size, see comment
+70102babb9c0f   # Provide a empirical lookup table for blocking sizes measured on a Nexus 5.<br/> Only for float, only for Android on ARM 32bit for now.
+7088481dc21ea   # Bug 986: add support for coefficient-based<br/> product with 0 depth.
+709d7f51feb07   # Bug 992: don't select a 3p GEMM path with non-SIMD scalar types.
+759f9303cc7c5   # 3.3-alpha1
+765aba1eda71e   # help clang inlining
+770fe630c9873   # Improve numerical accuracy in LLT and triangular solve<br/> by using true scalar divisions (instead of x * (1/y))
+#8741d23430628   # Improved the matrix multiplication blocking in the case<br/> where mr is not a power of 2 (e.g on Haswell CPUs)
+878f629fe95c8   # Made the index type a template parameter to evaluateProductBlockingSizes.<br/> Use numext::mini and numext::maxi instead of <br/> std::min/std::max to compute blocking sizes.
+8975d51a7f12c   # Don't optimize the processing of the last rows of<br/> a matrix matrix product in cases that violate<br/> the assumptions made by the optimized code path.
+8986136f4fdd4   # Remove the rotating kernel.
+898e68e165a23   # Bug 256: enable vectorization with unaligned loads/stores.
+91466e99ab6a1   # Relax mixing-type constraints for binary coeff-wise operators
+91776236cdea4   # merge
+917101ea26f5e   # Include the cost of stores in unrolling
+921672076db5d   # Fix perf regression introduced in changeset e56aabf205
+9210fa9e4a15c   # Fix perf regression in dgemm introduced by changeset 5d51a7f12c
+936f6b3cf8de9   # 3.3-beta2
+944504a4404f1   # Optimize expression matching 'd?=a-b*c' as 'd?=a; d?=b*c;'
+95877e27fbeee   # 3.3-rc1
+959779774f98c   # Bug 1311: fix alignment logic in some cases<br/> of (scalar*small).lazyProduct(small)
+9729f9d8d2f62   # Disabled part of the matrix matrix peeling code<br/> that's incompatible with 512 bit registers
+979eeac81b8c0   # 3.3.0
+989c927af60ed   # Fix a performance regression in (mat*mat)*vec<br/> for which mat*mat was evaluated multiple times.
+994fe696022ec   # Operators += and -= do not resize!
+99466f65ccc36   # Ease compiler generating clean and efficient code in mat*vec
+9946a5fe86098   # Complete rewrite of column-major-matrix * vector product<br/> to deliver higher performance of modern CPU.
+99591003f3b86   # Improve performance of row-major-dense-matrix * vector products<br/> for recent CPUs.
+997eb621413c1   # Revert vec/y to vec*(1/y) in row-major TRSM
+10444bbc320468  # Bug 1435: fix aliasing issue in exressions like: A = C - B*A;
+1073624df50945  # Adds missing EIGEN_STRONG_INLINE to support MSVC<br/> properly inlining small vector calculations
+1094d428a199ab  # Bug 1562: optimize evaluation of small products<br/> of the form s*A*B by rewriting them as: s*(A.lazyProduct(B))<br/> to save a costly temporary.<br/> Measured speedup from 2x to 5x.
+1096de9e31a06d  # Introduce the macro ei_declare_local_nested_eval to<br/> help allocating on the stack local temporaries via alloca,<br/> and let outer-products makes a good use of it.
+11087b91c11207  # Bug 1578: Improve prefetching in matrix multiplication on MIPS.
+1153aa110e681b  # PR 526: Speed up multiplication of small, dynamically sized matrices
+11544ad359237a  # Vectorize row-by-row gebp loop iterations on 16 packets as well
+1157a476054879  # Bug 1624: improve matrix-matrix product on ARM 64, 20% speedup
+1160a4159dba08  # do not read buffers out of bounds
+1163c53eececb0  # Implement AVX512 vectorization of std::complex<float/double>
+11644e7746fe22  # Bug 1636: fix gemm performance issue with gcc>=6 and no FMA
+1164956678a4ef  # Bug 1515: disable gebp's 3pX4 micro kernel<br/> for MSVC<=19.14 because of register spilling.
+1165426bce7529  # fix EIGEN_GEBP_2PX4_SPILLING_WORKAROUND<br/> for non vectorized type, and non x86/64 target
+11660d90637838  # enable spilling workaround on architectures with SSE/AVX
+1166f159cf3d75  # Artificially increase l1-blocking size for AVX512.<br/> +10% speedup with current kernels.
+11686dd93f7e3b  # Make code compile again for older compilers.
+1175dbfcceabf5  # Bug: 1633: refactor gebp kernel and optimize for neon
+117670e133333d  # Bug 1661: fix regression in GEBP and AVX512
+11760f028f61cb  # GEBP: cleanup logic to choose between<br/> a 4 packets of 1 packet (=e118ce86fd+fix)
+1180de77bf5d6c  # gebp: Add new ½ and ¼ packet rows per (peeling) round on the lhs
--- a/libs/eigen/bench/perf_monitoring/gemm.cpp
+++ b/libs/eigen/bench/perf_monitoring/gemm.cpp
@@ -0,0 +1,12 @@
+#include "gemm_common.h"
+
+EIGEN_DONT_INLINE
+void gemm(const Mat &A, const Mat &B, Mat &C)
+{
+  C.noalias() += A * B;
+}
+
+int main(int argc, char **argv)
+{
+  return main_gemm(argc, argv, gemm);
+}
--- a/libs/eigen/bench/perf_monitoring/gemm_common.h
+++ b/libs/eigen/bench/perf_monitoring/gemm_common.h
@@ -0,0 +1,67 @@
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <string>
+#include "eigen_src/Eigen/Core"
+#include "../BenchTimer.h"
+using namespace Eigen;
+
+#ifndef SCALAR
+#error SCALAR must be defined
+#endif
+
+typedef SCALAR Scalar;
+
+typedef Matrix<Scalar,Dynamic,Dynamic> Mat;
+
+template<typename Func>
+EIGEN_DONT_INLINE
+double bench(long m, long n, long k, const Func& f)
+{
+  Mat A(m,k);
+  Mat B(k,n);
+  Mat C(m,n);
+  A.setRandom();
+  B.setRandom();
+  C.setZero();
+  
+  BenchTimer t;
+  
+  double up = 1e8*4/sizeof(Scalar);
+  double tm0 = 4, tm1 = 10;
+  if(NumTraits<Scalar>::IsComplex)
+  {
+    up /= 4;
+    tm0 = 2;
+    tm1 = 4;
+  }
+  
+  double flops = 2. * m * n * k;
+  long rep = std::max(1., std::min(100., up/flops) );
+  long tries = std::max(tm0, std::min(tm1, up/flops) );
+  
+  BENCH(t, tries, rep, f(A,B,C));
+  
+  return 1e-9 * rep * flops / t.best();
+}
+
+template<typename Func>
+int main_gemm(int argc, char **argv, const Func& f)
+{
+  std::vector<double> results;
+  
+  std::string filename = std::string("gemm_settings.txt");
+  if(argc>1)
+    filename = std::string(argv[1]);
+  std::ifstream settings(filename);
+  long m, n, k;
+  while(settings >> m >> n >> k)
+  {
+    //std::cerr << "  Testing " << m << " " << n << " " << k << std::endl;
+    results.push_back( bench(m, n, k, f) );
+  }
+  
+  std::cout << RowVectorXd::Map(results.data(), results.size());
+  
+  return 0;
+}
--- a/libs/eigen/bench/perf_monitoring/gemm_settings.txt
+++ b/libs/eigen/bench/perf_monitoring/gemm_settings.txt
@@ -0,0 +1,15 @@
+8 8 8
+9 9 9
+24 24 24
+239 239 239
+240 240 240
+2400 24 24
+24 2400 24
+24 24 2400
+24 2400 2400
+2400 24 2400
+2400 2400 24
+2400 2400 64
+4800 23 160
+23 4800 160
+2400 2400 2400
--- a/libs/eigen/bench/perf_monitoring/gemm_square_settings.txt
+++ b/libs/eigen/bench/perf_monitoring/gemm_square_settings.txt
@@ -0,0 +1,11 @@
+8 8 8
+9 9 9
+12 12 12
+15 15 15
+16 16 16
+24 24 24
+102 102 102
+239 239 239
+240 240 240
+2400 2400 2400
+2463 2463 2463
--- a/libs/eigen/bench/perf_monitoring/gemv.cpp
+++ b/libs/eigen/bench/perf_monitoring/gemv.cpp
@@ -0,0 +1,12 @@
+#include "gemv_common.h"
+
+EIGEN_DONT_INLINE
+void gemv(const Mat &A, const Vec &B, Vec &C)
+{
+  C.noalias() += A * B;
+}
+
+int main(int argc, char **argv)
+{
+  return main_gemv(argc, argv, gemv);
+}
--- a/libs/eigen/bench/perf_monitoring/gemv_common.h
+++ b/libs/eigen/bench/perf_monitoring/gemv_common.h
@@ -0,0 +1,69 @@
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <string>
+#include <functional>
+#include "eigen_src/Eigen/Core"
+#include "../BenchTimer.h"
+using namespace Eigen;
+
+#ifndef SCALAR
+#error SCALAR must be defined
+#endif
+
+typedef SCALAR Scalar;
+
+typedef Matrix<Scalar,Dynamic,Dynamic> Mat;
+typedef Matrix<Scalar,Dynamic,1>       Vec;
+
+template<typename Func>
+EIGEN_DONT_INLINE
+double bench(long m, long n, Func &f)
+{
+  Mat A(m,n);
+  Vec B(n);
+  Vec C(m);
+  A.setRandom();
+  B.setRandom();
+  C.setRandom();
+
+  BenchTimer t;
+
+  double up = 1e8/sizeof(Scalar);
+  double tm0 = 4, tm1 = 10;
+  if(NumTraits<Scalar>::IsComplex)
+  {
+    up /= 4;
+    tm0 = 2;
+    tm1 = 4;
+  }
+
+  double flops = 2. * m * n;
+  long rep = std::max(1., std::min(100., up/flops) );
+  long tries = std::max(tm0, std::min(tm1, up/flops) );
+
+  BENCH(t, tries, rep, f(A,B,C));
+
+  return 1e-9 * rep * flops / t.best();
+}
+
+template<typename Func>
+int main_gemv(int argc, char **argv, Func& f)
+{
+  std::vector<double> results;
+
+  std::string filename = std::string("gemv_settings.txt");
+  if(argc>1)
+    filename = std::string(argv[1]);
+  std::ifstream settings(filename);
+  long m, n;
+  while(settings >> m >> n)
+  {
+    //std::cerr << "  Testing " << m << " " << n << std::endl;
+    results.push_back( bench(m, n, f) );
+  }
+
+  std::cout << RowVectorXd::Map(results.data(), results.size());
+
+  return 0;
+}
--- a/libs/eigen/bench/perf_monitoring/gemv_settings.txt
+++ b/libs/eigen/bench/perf_monitoring/gemv_settings.txt
@@ -0,0 +1,11 @@
+8 8
+9 9
+24 24
+239 239
+240 240
+2400 24
+24 2400
+24 240
+2400 2400
+4800 23
+23 4800
--- a/libs/eigen/bench/perf_monitoring/gemv_square_settings.txt
+++ b/libs/eigen/bench/perf_monitoring/gemv_square_settings.txt
@@ -0,0 +1,13 @@
+8 8
+9 9
+12 12
+15 15
+16 16
+24 24
+53 53
+74 74
+102 102
+239 239
+240 240
+2400 2400
+2463 2463
--- a/libs/eigen/bench/perf_monitoring/gemvt.cpp
+++ b/libs/eigen/bench/perf_monitoring/gemvt.cpp
@@ -0,0 +1,12 @@
+#include "gemv_common.h"
+
+EIGEN_DONT_INLINE
+void gemv(const Mat &A, Vec &B, const Vec &C)
+{
+  B.noalias() += A.transpose() * C;
+}
+
+int main(int argc, char **argv)
+{
+  return main_gemv(argc, argv, gemv);
+}
--- a/libs/eigen/bench/perf_monitoring/lazy_gemm.cpp
+++ b/libs/eigen/bench/perf_monitoring/lazy_gemm.cpp
@@ -0,0 +1,101 @@
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <Eigen/Core>
+#include "../../BenchTimer.h"
+using namespace Eigen;
+
+#ifndef SCALAR
+#error SCALAR must be defined
+#endif
+
+typedef SCALAR Scalar;
+
+template<typename MatA, typename MatB, typename MatC>
+EIGEN_DONT_INLINE
+void lazy_gemm(const MatA &A, const MatB &B, MatC &C)
+{
+//   escape((void*)A.data());
+//   escape((void*)B.data());
+  C.noalias() += A.lazyProduct(B);
+//   escape((void*)C.data());
+}
+
+template<int m, int n, int k, int TA>
+EIGEN_DONT_INLINE
+double bench()
+{
+  typedef Matrix<Scalar,m,k,TA> MatA;
+  typedef Matrix<Scalar,k,n> MatB;
+  typedef Matrix<Scalar,m,n> MatC;
+
+  MatA A(m,k);
+  MatB B(k,n);
+  MatC C(m,n);
+  A.setRandom();
+  B.setRandom();
+  C.setZero();
+
+  BenchTimer t;
+
+  double up = 1e7*4/sizeof(Scalar);
+  double tm0 = 10, tm1 = 20;
+
+  double flops = 2. * m * n * k;
+  long rep = std::max(10., std::min(10000., up/flops) );
+  long tries = std::max(tm0, std::min(tm1, up/flops) );
+
+  BENCH(t, tries, rep, lazy_gemm(A,B,C));
+
+  return 1e-9 * rep * flops / t.best();
+}
+
+template<int m, int n, int k>
+double bench_t(int t)
+{
+  if(t)
+    return bench<m,n,k,RowMajor>();
+  else
+    return bench<m,n,k,0>();
+}
+
+EIGEN_DONT_INLINE
+double bench_mnk(int m, int n, int k, int t)
+{
+  int id = m*10000 + n*100 + k;
+  switch(id) {
+    case  10101 : return bench_t< 1, 1, 1>(t); break;
+    case  20202 : return bench_t< 2, 2, 2>(t); break;
+    case  30303 : return bench_t< 3, 3, 3>(t); break;
+    case  40404 : return bench_t< 4, 4, 4>(t); break;
+    case  50505 : return bench_t< 5, 5, 5>(t); break;
+    case  60606 : return bench_t< 6, 6, 6>(t); break;
+    case  70707 : return bench_t< 7, 7, 7>(t); break;
+    case  80808 : return bench_t< 8, 8, 8>(t); break;
+    case  90909 : return bench_t< 9, 9, 9>(t); break;
+    case 101010 : return bench_t<10,10,10>(t); break;
+    case 111111 : return bench_t<11,11,11>(t); break;
+    case 121212 : return bench_t<12,12,12>(t); break;
+  }
+  return 0;
+}
+
+int main(int argc, char **argv)
+{
+  std::vector<double> results;
+  
+  std::string filename = std::string("lazy_gemm_settings.txt");
+  if(argc>1)
+    filename = std::string(argv[1]);
+  std::ifstream settings(filename);
+  long m, n, k, t;
+  while(settings >> m >> n >> k >> t)
+  {
+    //std::cerr << "  Testing " << m << " " << n << " " << k << std::endl;
+    results.push_back( bench_mnk(m, n, k, t) );
+  }
+  
+  std::cout << RowVectorXd::Map(results.data(), results.size());
+  
+  return 0;
+}
--- a/libs/eigen/bench/perf_monitoring/lazy_gemm_settings.txt
+++ b/libs/eigen/bench/perf_monitoring/lazy_gemm_settings.txt
@@ -0,0 +1,15 @@
+1 1 1 0
+2 2 2 0
+3 3 3 0
+4 4 4 0
+4 4 4 1
+5 5 5 0
+6 6 6 0
+7 7 7 0
+7 7 7 1
+8 8 8 0
+9 9 9 0
+10 10 10 0
+11 11 11 0
+12 12 12 0
+12 12 12 1
--- a/libs/eigen/bench/perf_monitoring/llt.cpp
+++ b/libs/eigen/bench/perf_monitoring/llt.cpp
@@ -0,0 +1,15 @@
+#include "gemm_common.h"
+#include <Eigen/Cholesky>
+
+EIGEN_DONT_INLINE
+void llt(const Mat &A, const Mat &B, Mat &C)
+{
+  C = A;
+  C.diagonal().array() += 1000;
+  Eigen::internal::llt_inplace<Mat::Scalar, Lower>::blocked(C);
+}
+
+int main(int argc, char **argv)
+{
+  return main_gemm(argc, argv, llt);
+}
--- a/libs/eigen/bench/perf_monitoring/make_plot.sh
+++ b/libs/eigen/bench/perf_monitoring/make_plot.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+
+# base name of the bench
+# it reads $1.out
+# and generates $1.pdf
+WHAT=$1
+bench=$2
+settings_file=$3
+
+header="rev "
+while read line
+do
+  if [ ! -z '$line' ]; then
+    header="$header  \"$line\""
+  fi
+done < $settings_file
+
+echo $header > $WHAT.out.header
+cat $WHAT.out >> $WHAT.out.header
+
+
+echo "set title '$WHAT'" > $WHAT.gnuplot
+echo "set key autotitle columnhead outside " >> $WHAT.gnuplot
+echo "set xtics rotate 1" >> $WHAT.gnuplot
+
+echo "set term pdf color rounded enhanced fontscale 0.35 size 7in,5in" >> $WHAT.gnuplot
+echo set output "'"$WHAT.pdf"'" >> $WHAT.gnuplot
+
+col=`cat $settings_file | wc -l`
+echo "plot for [col=2:$col+1] '$WHAT.out.header' using 0:col:xticlabels(1) with lines" >> $WHAT.gnuplot
+echo " " >>  $WHAT.gnuplot
+
+gnuplot -persist < $WHAT.gnuplot
+
+# generate a png file (thumbnail)
+convert -colors 256 -background white -density 300 -resize 300  -quality 0 $WHAT.pdf -background white -flatten $WHAT.png
+
+# clean
+rm $WHAT.out.header $WHAT.gnuplot
+
+
+# generate html/svg graph
+
+echo " " > $WHAT.html
+cat resources/chart_header.html > $WHAT.html
+echo 'var customSettings = {"TITLE":"","SUBTITLE":"","XLABEL":"","YLABEL":""};' >> $WHAT.html
+#  'data' is an array of datasets (i.e. curves), each of which is an object of the form
+#  {
+#    key: <name of the curve>,
+#    color: <optional color of the curve>,
+#    values: [{
+#        r: <revision number>,
+#        v: <GFlops>
+#    }]
+#  }
+echo 'var data = [' >> $WHAT.html
+
+col=2
+while read line
+do
+  if [ ! -z '$line' ]; then
+    header="$header  \"$line\""
+    echo '{"key":"'$line'","values":[' >> $WHAT.html
+    i=0
+    while read line2
+    do
+      if [ ! -z "$line2" ]; then
+        val=`echo $line2 | cut -s -f $col -d ' '`
+        if [ -n "$val" ]; then # skip build failures
+          echo '{"r":'$i',"v":'$val'},' >> $WHAT.html
+        fi
+      fi
+      ((i++))
+    done < $WHAT.out
+    echo ']},'  >> $WHAT.html
+  fi
+  ((col++))
+done < $settings_file
+echo '];'  >> $WHAT.html
+
+echo 'var changesets = [' >> $WHAT.html
+while read line2
+do
+  if [ ! -z '$line2' ]; then
+    echo '"'`echo $line2 | cut -f 1 -d ' '`'",' >> $WHAT.html
+  fi
+done < $WHAT.out
+echo '];'  >> $WHAT.html
+
+echo 'var changesets_details = [' >> $WHAT.html
+while read line2
+do
+  if [ ! -z '$line2' ]; then
+    num=`echo "$line2" | cut -f 1 -d ' '`
+    comment=`grep ":$num" changesets.txt | cut -f 2 -d '#'`
+    echo '"'"$comment"'",' >> $WHAT.html
+  fi
+done < $WHAT.out
+echo '];'  >> $WHAT.html
+
+echo 'var changesets_count = [' >> $WHAT.html
+i=0
+while read line2
+do
+  if [ ! -z '$line2' ]; then
+    echo $i ',' >> $WHAT.html
+  fi
+  ((i++))
+done < $WHAT.out
+echo '];'  >> $WHAT.html
+
+cat resources/chart_footer.html >> $WHAT.html
--- a/libs/eigen/bench/perf_monitoring/resources/chart_footer.html
+++ b/libs/eigen/bench/perf_monitoring/resources/chart_footer.html
@@ -0,0 +1,41 @@
+      /* setup the chart and its options */                                                                                
+      var chart = nv.models.lineChart()                                                                                    
+                    .color(d3.scale.category10().range())                                                                  
+                    .margin({left: 75, bottom: 100})                                                                        
+                    .forceX([0]).forceY([0]);                                                                              
+                                                                                                                           
+      chart.x(function(datum){ return datum.r; })                                                                          
+           .xAxis.options({                                                                                                
+             axisLabel: customSettings.XLABEL || 'Changeset',
+             tickFormat: d3.format('.0f')                                                                                  
+           });
+      chart.xAxis
+        .tickValues(changesets_count)
+        .tickFormat(function(d){return changesets[d]})
+        .rotateLabels(-90);
+                                                                                                                                                                                                       
+      chart.y(function(datum){ return datum.v; })                                                    
+            .yAxis.options({                                                                                              
+              axisLabel: customSettings.YLABEL || 'GFlops'/*,
+              tickFormat: function(val){ return d3.format('.0f')(val) + ' GFlops'; }*/
+            });
+      
+      chart.tooltip.headerFormatter(function(d) { return changesets[d]
+        + ' <p style="font-weight:normal;text-align: left;">'
+        + changesets_details[d] + "</p>"; });
+
+      //chart.useInteractiveGuideline(true);
+      d3.select('#chart').datum(data).call(chart);                                                                         
+      var plot = d3.select('#chart > g');                                                                                  
+                                                                                                                           
+      /* setup the title */                                                                                                
+      plot.append('text')                                                                                                  
+          .style('font-size', '24px')                                                                                      
+          .attr('text-anchor', 'middle').attr('x', '50%').attr('y', '20px')                                                
+          .text(customSettings.TITLE || '');                                                                                                                   
+                                                                                                                           
+      /* ensure the chart is responsive */                                                                                 
+      nv.utils.windowResize(chart.update);                                                                                 
+    </script>                                                                                                              
+  </body>                                                                                                                  
+</html>  
--- a/libs/eigen/bench/perf_monitoring/resources/chart_header.html
+++ b/libs/eigen/bench/perf_monitoring/resources/chart_header.html
--- a/libs/eigen/bench/perf_monitoring/resources/footer.html
+++ b/libs/eigen/bench/perf_monitoring/resources/footer.html
@@ -0,0 +1,3 @@
+</table>
+</body>
+</html>
--- a/libs/eigen/bench/perf_monitoring/resources/header.html
+++ b/libs/eigen/bench/perf_monitoring/resources/header.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<title>Eigen performance monitoring</title>
+<style type="text/css">
+
+body
+{
+ background:#fff;
+}
+th {
+
+}
+img
+{
+ width:auto;
+ box-shadow:0px 0px 20px #cecece;
+ margin: 20px 20px  20px  20px;
+  -moz-transform: scale(1);
+ -moz-transition-duration: 0.4s;
+ -webkit-transition-duration: 0.4s;
+ -webkit-transform: scale(1);
+
+ -ms-transform: scale(1);
+ -ms-transition-duration: 0.4s;
+}
+img:hover
+{
+box-shadow: 5px 5px 20px #dcdcdc;
+ -moz-transform: scale(1.1);
+ -moz-transition-duration: 0.4s;
+ -webkit-transition-duration: 0.4s;
+ -webkit-transform: scale(1.1);
+
+ -ms-transform: scale(1.1);
+ -ms-transition-duration: 0.4s;
+
+}
+</style>
+</head>
+<body>
--- a/libs/eigen/bench/perf_monitoring/resources/s1.js
+++ b/libs/eigen/bench/perf_monitoring/resources/s1.js
--- a/libs/eigen/bench/perf_monitoring/resources/s2.js
+++ b/libs/eigen/bench/perf_monitoring/resources/s2.js
--- a/libs/eigen/bench/perf_monitoring/run.sh
+++ b/libs/eigen/bench/perf_monitoring/run.sh
@@ -0,0 +1,183 @@
+#!/bin/bash
+
+# ./run.sh gemm gemm_settings.txt
+# ./run.sh lazy_gemm lazy_gemm_settings.txt
+# ./run.sh gemv gemv_settings.txt
+# ./run.sh trmv_up gemv_square_settings.txt
+# ...
+
+# Examples of environment variables to be set:
+#   PREFIX="haswell-fma-"
+#   CXX_FLAGS="-mfma"
+#   CXX=clang++
+
+# Options:
+#   -up : enforce the recomputation of existing data, and keep best results as a merging strategy
+#   -s  : recompute selected changesets only and keep bests
+#   -np : no plotting of results, just generate the data
+
+bench=$1
+settings_file=$2
+
+if [[ "$*" =~ '-up' ]]; then
+  update=true
+else
+  update=false
+fi
+
+if [[ "$*" =~ '-s' ]]; then
+  selected=true
+else
+  selected=false
+fi
+
+if [[ "$*" =~ '-np' ]]; then
+  do_plot=false
+else
+  do_plot=true
+fi
+
+
+WORKING_DIR=${PREFIX:?"default"}
+
+if [ -z "$PREFIX" ]; then
+  WORKING_DIR_PREFIX="$WORKING_DIR/"
+else
+  WORKING_DIR_PREFIX="$WORKING_DIR/$PREFIX-"
+fi
+echo "WORKING_DIR_PREFIX=$WORKING_DIR_PREFIX"
+mkdir -p $WORKING_DIR
+
+global_args="$*"
+
+if $selected ; then
+ echo "Recompute selected changesets only and keep bests"
+elif $update ; then
+ echo "(Re-)Compute all changesets and keep bests"
+else
+ echo "Skip previously computed changesets"
+fi
+
+
+
+if [ ! -d "eigen_src" ]; then
+  git clone https://gitlab.com/libeigen/eigen.git eigen_src
+else
+  cd eigen_src
+  git pull
+  cd ..
+fi
+
+if [ -z "$CXX" ]; then
+  CXX=g++
+fi
+
+function make_backup
+{
+  if [ -f "$1.out" ]; then
+    mv "$1.out" "$1.backup"
+  fi
+}
+
+function merge
+{
+  count1=`echo $1 |  wc -w`
+  count2=`echo $2 |  wc -w`
+  
+  if [ $count1 == $count2 ]; then
+    a=( $1 ); b=( $2 )
+    res=""
+    for (( i=0 ; i<$count1 ; i++ )); do
+      ai=${a[$i]}; bi=${b[$i]}
+      tmp=`echo "if ($ai > $bi) $ai else $bi " | bc -l`
+      res="$res $tmp"
+    done
+    echo $res
+
+  else
+    echo $1
+  fi
+}
+
+function test_current 
+{
+  rev=$1
+  scalar=$2
+  name=$3
+  
+  prev=""
+  if [ -e "$name.backup" ]; then
+    prev=`grep $rev "$name.backup" | cut -d ' ' -f 2-`
+  fi
+  res=$prev
+  count_rev=`echo $prev |  wc -w`
+  count_ref=`cat $settings_file |  wc -l`
+  if echo "$global_args" | grep "$rev" > /dev/null; then
+    rev_found=true
+  else
+    rev_found=false
+  fi
+#  echo $update et $selected et $rev_found because $rev et "$global_args"
+#  echo $count_rev et $count_ref
+  if $update || [ $count_rev != $count_ref ] || ( $selected &&  $rev_found ); then
+    echo "RUN: $CXX -O3 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src $bench.cpp -DSCALAR=$scalar -o $name"
+    if $CXX -O3 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src $bench.cpp -DSCALAR=$scalar -o $name; then
+      curr=`./$name $settings_file`
+      if [ $count_rev == $count_ref ]; then
+        echo "merge previous $prev"
+        echo "with new       $curr"
+      else
+        echo "got            $curr"
+      fi
+      res=`merge "$curr" "$prev"`
+#       echo $res
+      echo "$rev $res" >> $name.out
+    else
+      echo "Compilation failed, skip rev $rev"
+    fi
+  else
+    echo "Skip existing results for $rev / $name"
+    echo "$rev $res" >> $name.out
+  fi
+}
+
+make_backup $WORKING_DIR_PREFIX"s"$bench
+make_backup $WORKING_DIR_PREFIX"d"$bench
+make_backup $WORKING_DIR_PREFIX"c"$bench
+
+cut -f1 -d"#" < changesets.txt | grep -E '[[:alnum:]]' | while read rev
+do
+  if [ ! -z '$rev' ]; then
+    rev2=`echo $rev | cut -f 2 -d':'`
+    echo "Testing rev $rev, $rev2"
+    cd eigen_src
+    git checkout $rev2 > /dev/null
+    actual_rev=`git rev-parse --short HEAD`
+    cd ..
+    
+    test_current $actual_rev float                  $WORKING_DIR_PREFIX"s"$bench
+    test_current $actual_rev double                 $WORKING_DIR_PREFIX"d"$bench
+    test_current $actual_rev "std::complex<double>" $WORKING_DIR_PREFIX"c"$bench
+  fi
+  
+done
+
+echo "Float:"
+cat $WORKING_DIR_PREFIX"s""$bench.out"
+echo " "
+
+echo "Double:"
+cat $WORKING_DIR_PREFIX"d""$bench.out"
+echo ""
+
+echo "Complex:"
+cat $WORKING_DIR_PREFIX"c""$bench.out"
+echo ""
+
+if $do_plot ; then
+
+./make_plot.sh $WORKING_DIR_PREFIX"s"$bench $bench $settings_file
+./make_plot.sh $WORKING_DIR_PREFIX"d"$bench $bench $settings_file
+./make_plot.sh $WORKING_DIR_PREFIX"c"$bench $bench $settings_file
+
+fi
--- a/libs/eigen/bench/perf_monitoring/runall.sh
+++ b/libs/eigen/bench/perf_monitoring/runall.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+# ./runall.sh "Title"
+
+# Examples of environment variables to be set:
+#   PREFIX="haswell-fma-"
+#   CXX_FLAGS="-mfma"
+#   CXX=clang++
+
+# Options:
+#   -up : enforce the recomputation of existing data, and keep best results as a merging strategy
+#   -s  : recompute selected changesets only and keep bests
+#   -np : no plotting of results, just generate the data
+
+if [[ "$*" =~ '-np' ]]; then
+  do_plot=false
+else
+  do_plot=true
+fi
+
+./run.sh gemm gemm_settings.txt $*
+./run.sh lazy_gemm lazy_gemm_settings.txt $*
+./run.sh gemv gemv_settings.txt $*
+./run.sh gemvt gemv_settings.txt $*
+./run.sh trmv_up gemv_square_settings.txt $*
+./run.sh trmv_lo gemv_square_settings.txt $*
+./run.sh trmv_upt gemv_square_settings.txt $*
+./run.sh trmv_lot gemv_square_settings.txt $*
+./run.sh llt gemm_square_settings.txt $*
+
+if $do_plot ; then
+
+# generate html file
+
+function print_td {
+  echo '<td><a href="'$PREFIX'-'$1"$2"'.html"><img src="'$PREFIX'-'$1"$2"'.png" title="'$3'"></a></td>' >> $htmlfile
+}
+
+function print_tr {
+  echo '<tr><th colspan="3">'"$2"'</th></tr>' >> $htmlfile
+  echo '<tr>' >> $htmlfile
+  print_td s $1 float
+  print_td d $1 double
+  print_td c $1 complex
+  echo '</tr>' >> $htmlfile
+}
+
+if [ -n "$PREFIX" ]; then
+
+
+cp resources/s1.js $PREFIX/
+cp resources/s2.js $PREFIX/
+
+htmlfile="$PREFIX/index.html"
+cat resources/header.html > $htmlfile
+
+echo '<h1>'$1'</h1>' >> $htmlfile
+echo '<table>' >> $htmlfile
+print_tr gemm       'C += A &middot; B   &nbsp; (gemm)'
+print_tr lazy_gemm  'C += A &middot; B   &nbsp; (gemm lazy)'
+print_tr gemv       'y += A &middot; x   &nbsp; (gemv)'
+print_tr gemvt      'y += A<sup>T</sup> &middot; x  &nbsp; (gemv)'
+print_tr trmv_up    'y += U &middot; x   &nbsp; (trmv)'
+print_tr trmv_upt   'y += U<sup>T</sup> &middot; x  &nbsp; (trmv)'
+print_tr trmv_lo    'y += L &middot; x   &nbsp; (trmv)'
+print_tr trmv_lot   'y += L<sup>T</sup> &middot; x  &nbsp; (trmv)'
+print_tr trmv_lot   'L &middot; L<sup>T<sup> = A &nbsp;  (Cholesky,potrf)'
+
+cat resources/footer.html >> $htmlfile
+
+fi
+fi
--- a/libs/eigen/bench/perf_monitoring/trmv_lo.cpp
+++ b/libs/eigen/bench/perf_monitoring/trmv_lo.cpp
@@ -0,0 +1,12 @@
+#include "gemv_common.h"
+
+EIGEN_DONT_INLINE
+void trmv(const Mat &A, const Vec &B, Vec &C)
+{
+  C.noalias() += A.triangularView<Lower>() * B;
+}
+
+int main(int argc, char **argv)
+{
+  return main_gemv(argc, argv, trmv);
+}
--- a/libs/eigen/bench/perf_monitoring/trmv_lot.cpp
+++ b/libs/eigen/bench/perf_monitoring/trmv_lot.cpp
@@ -0,0 +1,12 @@
+#include "gemv_common.h"
+
+EIGEN_DONT_INLINE
+void trmv(const Mat &A, Vec &B, const Vec &C)
+{
+  B.noalias() += A.transpose().triangularView<Lower>() * C;
+}
+
+int main(int argc, char **argv)
+{
+  return main_gemv(argc, argv, trmv);
+}
--- a/libs/eigen/bench/perf_monitoring/trmv_up.cpp
+++ b/libs/eigen/bench/perf_monitoring/trmv_up.cpp
@@ -0,0 +1,12 @@
+#include "gemv_common.h"
+
+EIGEN_DONT_INLINE
+void trmv(const Mat &A, const Vec &B, Vec &C)
+{
+  C.noalias() += A.triangularView<Upper>() * B;
+}
+
+int main(int argc, char **argv)
+{
+  return main_gemv(argc, argv, trmv);
+}
--- a/libs/eigen/bench/perf_monitoring/trmv_upt.cpp
+++ b/libs/eigen/bench/perf_monitoring/trmv_upt.cpp
@@ -0,0 +1,12 @@
+#include "gemv_common.h"
+
+EIGEN_DONT_INLINE
+void trmv(const Mat &A, Vec &B, const Vec &C)
+{
+  B.noalias() += A.transpose().triangularView<Upper>() * C;
+}
+
+int main(int argc, char **argv)
+{
+  return main_gemv(argc, argv, trmv);
+}