ADD: new track message, Entity class and Position class
This commit is contained in:
95
libs/eigen/bench/perf_monitoring/changesets.txt
Normal file
95
libs/eigen/bench/perf_monitoring/changesets.txt
Normal file
@@ -0,0 +1,95 @@
|
||||
Load hg-to-git hash maps from ./eigen_git/.git/
|
||||
#3.0.1
|
||||
#3.1.1
|
||||
#3.2.0
|
||||
3.2.4
|
||||
#574a7621809fe
|
||||
58964a85800bd # introduce AVX
|
||||
#589cbd7e98174 # merge
|
||||
589db7d49efbb # introduce FMA
|
||||
#590a078f442a3 # complex and AVX
|
||||
590a419cea4a0 # improve packing with ptranspose
|
||||
#59251e85c936d # merge
|
||||
#592e497a27ddc
|
||||
593d5a795f673 # New gebp kernel: up to 3 packets x 4 register-level blocks
|
||||
#5942c3c95990d # merge
|
||||
#596c9788d55b9 # Disable 3pX4 kernel on Altivec
|
||||
#5999aa3dc4e21 # merge
|
||||
6209452eb38f8 # before-evaluators
|
||||
#6333eba5e1101 # Implement evaluator for sparse outer products
|
||||
#663b9d314ae19
|
||||
#6655ef95fabee # Properly detect FMA support on ARM
|
||||
#667fe25f3b8e3 # FMA has been wrongly disabled
|
||||
#668409547a0c8
|
||||
#6694304c73542 # merge default to tensors
|
||||
#67216047c8d4a # merge default to tensors
|
||||
#67410a79ca3a3 # merge default to tensors
|
||||
#674b7271dffb5 # Generalized the gebp apis
|
||||
676bfdd9f3ac9 # Made the blocking computation aware of the l3 cache;<br/> Also optimized the blocking parameters to take<br/> into account the number of threads used for a computation.
|
||||
6782dde63499c # generalized gemv
|
||||
6799f98650d0a # ensured that contractions that can be reduced to a matrix vector product
|
||||
#6840918c51e60 # merge tensor
|
||||
684e972b55ec4 # change prefetching in gebp
|
||||
#68598604576d1 # merge index conversion
|
||||
68963eb0f6fe6 # clean blocking size computation
|
||||
689db05f2d01e # rotating kernel for ARM only
|
||||
#6901b7e12847d # result_of
|
||||
69226275b250a # fix prefetching change for ARM
|
||||
692692136350b # prefetching
|
||||
693a8ad8887bf # blocking size strategy
|
||||
693bcf9bb5c1f # avoid redundant pack_rhs
|
||||
6987550107028 # dynamic loop swapping
|
||||
69858740ce4c6 # rm dynamic loop swapping,<br/> adjust lhs's micro panel height to fully exploit L1 cache
|
||||
698cd3bbffa73 # blocking heuristic:<br/> block on the rhs in L1 if the lhs fit in L1.
|
||||
701488c15615a # organize a little our default cache sizes,<br/> and use a saner default L1 outside of x86 (10% faster on Nexus 5)
|
||||
701e56aabf205 # Refactor computeProductBlockingSizes to make room<br/> for the possibility of using lookup tables
|
||||
701ca5c12587b # Polish lookup tables generation
|
||||
7013589a9c115 # actual_panel_rows computation should always be resilient<br/> to parameters not consistent with the known L1 cache size, see comment
|
||||
70102babb9c0f # Provide a empirical lookup table for blocking sizes measured on a Nexus 5.<br/> Only for float, only for Android on ARM 32bit for now.
|
||||
7088481dc21ea # Bug 986: add support for coefficient-based<br/> product with 0 depth.
|
||||
709d7f51feb07 # Bug 992: don't select a 3p GEMM path with non-SIMD scalar types.
|
||||
759f9303cc7c5 # 3.3-alpha1
|
||||
765aba1eda71e # help clang inlining
|
||||
770fe630c9873 # Improve numerical accuracy in LLT and triangular solve<br/> by using true scalar divisions (instead of x * (1/y))
|
||||
#8741d23430628 # Improved the matrix multiplication blocking in the case<br/> where mr is not a power of 2 (e.g on Haswell CPUs)
|
||||
878f629fe95c8 # Made the index type a template parameter to evaluateProductBlockingSizes.<br/> Use numext::mini and numext::maxi instead of <br/> std::min/std::max to compute blocking sizes.
|
||||
8975d51a7f12c # Don't optimize the processing of the last rows of<br/> a matrix matrix product in cases that violate<br/> the assumptions made by the optimized code path.
|
||||
8986136f4fdd4 # Remove the rotating kernel.
|
||||
898e68e165a23 # Bug 256: enable vectorization with unaligned loads/stores.
|
||||
91466e99ab6a1 # Relax mixing-type constraints for binary coeff-wise operators
|
||||
91776236cdea4 # merge
|
||||
917101ea26f5e # Include the cost of stores in unrolling
|
||||
921672076db5d # Fix perf regression introduced in changeset e56aabf205
|
||||
9210fa9e4a15c # Fix perf regression in dgemm introduced by changeset 5d51a7f12c
|
||||
936f6b3cf8de9 # 3.3-beta2
|
||||
944504a4404f1 # Optimize expression matching 'd?=a-b*c' as 'd?=a; d?=b*c;'
|
||||
95877e27fbeee # 3.3-rc1
|
||||
959779774f98c # Bug 1311: fix alignment logic in some cases<br/> of (scalar*small).lazyProduct(small)
|
||||
9729f9d8d2f62 # Disabled part of the matrix matrix peeling code<br/> that's incompatible with 512 bit registers
|
||||
979eeac81b8c0 # 3.3.0
|
||||
989c927af60ed # Fix a performance regression in (mat*mat)*vec<br/> for which mat*mat was evaluated multiple times.
|
||||
994fe696022ec # Operators += and -= do not resize!
|
||||
99466f65ccc36 # Ease compiler generating clean and efficient code in mat*vec
|
||||
9946a5fe86098 # Complete rewrite of column-major-matrix * vector product<br/> to deliver higher performance of modern CPU.
|
||||
99591003f3b86 # Improve performance of row-major-dense-matrix * vector products<br/> for recent CPUs.
|
||||
997eb621413c1 # Revert vec/y to vec*(1/y) in row-major TRSM
|
||||
10444bbc320468 # Bug 1435: fix aliasing issue in exressions like: A = C - B*A;
|
||||
1073624df50945 # Adds missing EIGEN_STRONG_INLINE to support MSVC<br/> properly inlining small vector calculations
|
||||
1094d428a199ab # Bug 1562: optimize evaluation of small products<br/> of the form s*A*B by rewriting them as: s*(A.lazyProduct(B))<br/> to save a costly temporary.<br/> Measured speedup from 2x to 5x.
|
||||
1096de9e31a06d # Introduce the macro ei_declare_local_nested_eval to<br/> help allocating on the stack local temporaries via alloca,<br/> and let outer-products makes a good use of it.
|
||||
11087b91c11207 # Bug 1578: Improve prefetching in matrix multiplication on MIPS.
|
||||
1153aa110e681b # PR 526: Speed up multiplication of small, dynamically sized matrices
|
||||
11544ad359237a # Vectorize row-by-row gebp loop iterations on 16 packets as well
|
||||
1157a476054879 # Bug 1624: improve matrix-matrix product on ARM 64, 20% speedup
|
||||
1160a4159dba08 # do not read buffers out of bounds
|
||||
1163c53eececb0 # Implement AVX512 vectorization of std::complex<float/double>
|
||||
11644e7746fe22 # Bug 1636: fix gemm performance issue with gcc>=6 and no FMA
|
||||
1164956678a4ef # Bug 1515: disable gebp's 3pX4 micro kernel<br/> for MSVC<=19.14 because of register spilling.
|
||||
1165426bce7529 # fix EIGEN_GEBP_2PX4_SPILLING_WORKAROUND<br/> for non vectorized type, and non x86/64 target
|
||||
11660d90637838 # enable spilling workaround on architectures with SSE/AVX
|
||||
1166f159cf3d75 # Artificially increase l1-blocking size for AVX512.<br/> +10% speedup with current kernels.
|
||||
11686dd93f7e3b # Make code compile again for older compilers.
|
||||
1175dbfcceabf5 # Bug: 1633: refactor gebp kernel and optimize for neon
|
||||
117670e133333d # Bug 1661: fix regression in GEBP and AVX512
|
||||
11760f028f61cb # GEBP: cleanup logic to choose between<br/> a 4 packets of 1 packet (=e118ce86fd+fix)
|
||||
1180de77bf5d6c # gebp: Add new ½ and ¼ packet rows per (peeling) round on the lhs
|
||||
12
libs/eigen/bench/perf_monitoring/gemm.cpp
Normal file
12
libs/eigen/bench/perf_monitoring/gemm.cpp
Normal file
@@ -0,0 +1,12 @@
|
||||
#include "gemm_common.h"
|
||||
|
||||
EIGEN_DONT_INLINE
|
||||
void gemm(const Mat &A, const Mat &B, Mat &C)
|
||||
{
|
||||
C.noalias() += A * B;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
return main_gemm(argc, argv, gemm);
|
||||
}
|
||||
67
libs/eigen/bench/perf_monitoring/gemm_common.h
Normal file
67
libs/eigen/bench/perf_monitoring/gemm_common.h
Normal file
@@ -0,0 +1,67 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "eigen_src/Eigen/Core"
|
||||
#include "../BenchTimer.h"
|
||||
using namespace Eigen;
|
||||
|
||||
#ifndef SCALAR
|
||||
#error SCALAR must be defined
|
||||
#endif
|
||||
|
||||
typedef SCALAR Scalar;
|
||||
|
||||
typedef Matrix<Scalar,Dynamic,Dynamic> Mat;
|
||||
|
||||
template<typename Func>
|
||||
EIGEN_DONT_INLINE
|
||||
double bench(long m, long n, long k, const Func& f)
|
||||
{
|
||||
Mat A(m,k);
|
||||
Mat B(k,n);
|
||||
Mat C(m,n);
|
||||
A.setRandom();
|
||||
B.setRandom();
|
||||
C.setZero();
|
||||
|
||||
BenchTimer t;
|
||||
|
||||
double up = 1e8*4/sizeof(Scalar);
|
||||
double tm0 = 4, tm1 = 10;
|
||||
if(NumTraits<Scalar>::IsComplex)
|
||||
{
|
||||
up /= 4;
|
||||
tm0 = 2;
|
||||
tm1 = 4;
|
||||
}
|
||||
|
||||
double flops = 2. * m * n * k;
|
||||
long rep = std::max(1., std::min(100., up/flops) );
|
||||
long tries = std::max(tm0, std::min(tm1, up/flops) );
|
||||
|
||||
BENCH(t, tries, rep, f(A,B,C));
|
||||
|
||||
return 1e-9 * rep * flops / t.best();
|
||||
}
|
||||
|
||||
template<typename Func>
|
||||
int main_gemm(int argc, char **argv, const Func& f)
|
||||
{
|
||||
std::vector<double> results;
|
||||
|
||||
std::string filename = std::string("gemm_settings.txt");
|
||||
if(argc>1)
|
||||
filename = std::string(argv[1]);
|
||||
std::ifstream settings(filename);
|
||||
long m, n, k;
|
||||
while(settings >> m >> n >> k)
|
||||
{
|
||||
//std::cerr << " Testing " << m << " " << n << " " << k << std::endl;
|
||||
results.push_back( bench(m, n, k, f) );
|
||||
}
|
||||
|
||||
std::cout << RowVectorXd::Map(results.data(), results.size());
|
||||
|
||||
return 0;
|
||||
}
|
||||
15
libs/eigen/bench/perf_monitoring/gemm_settings.txt
Normal file
15
libs/eigen/bench/perf_monitoring/gemm_settings.txt
Normal file
@@ -0,0 +1,15 @@
|
||||
8 8 8
|
||||
9 9 9
|
||||
24 24 24
|
||||
239 239 239
|
||||
240 240 240
|
||||
2400 24 24
|
||||
24 2400 24
|
||||
24 24 2400
|
||||
24 2400 2400
|
||||
2400 24 2400
|
||||
2400 2400 24
|
||||
2400 2400 64
|
||||
4800 23 160
|
||||
23 4800 160
|
||||
2400 2400 2400
|
||||
11
libs/eigen/bench/perf_monitoring/gemm_square_settings.txt
Normal file
11
libs/eigen/bench/perf_monitoring/gemm_square_settings.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
8 8 8
|
||||
9 9 9
|
||||
12 12 12
|
||||
15 15 15
|
||||
16 16 16
|
||||
24 24 24
|
||||
102 102 102
|
||||
239 239 239
|
||||
240 240 240
|
||||
2400 2400 2400
|
||||
2463 2463 2463
|
||||
12
libs/eigen/bench/perf_monitoring/gemv.cpp
Normal file
12
libs/eigen/bench/perf_monitoring/gemv.cpp
Normal file
@@ -0,0 +1,12 @@
|
||||
#include "gemv_common.h"
|
||||
|
||||
EIGEN_DONT_INLINE
|
||||
void gemv(const Mat &A, const Vec &B, Vec &C)
|
||||
{
|
||||
C.noalias() += A * B;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
return main_gemv(argc, argv, gemv);
|
||||
}
|
||||
69
libs/eigen/bench/perf_monitoring/gemv_common.h
Normal file
69
libs/eigen/bench/perf_monitoring/gemv_common.h
Normal file
@@ -0,0 +1,69 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include "eigen_src/Eigen/Core"
|
||||
#include "../BenchTimer.h"
|
||||
using namespace Eigen;
|
||||
|
||||
#ifndef SCALAR
|
||||
#error SCALAR must be defined
|
||||
#endif
|
||||
|
||||
typedef SCALAR Scalar;
|
||||
|
||||
typedef Matrix<Scalar,Dynamic,Dynamic> Mat;
|
||||
typedef Matrix<Scalar,Dynamic,1> Vec;
|
||||
|
||||
template<typename Func>
|
||||
EIGEN_DONT_INLINE
|
||||
double bench(long m, long n, Func &f)
|
||||
{
|
||||
Mat A(m,n);
|
||||
Vec B(n);
|
||||
Vec C(m);
|
||||
A.setRandom();
|
||||
B.setRandom();
|
||||
C.setRandom();
|
||||
|
||||
BenchTimer t;
|
||||
|
||||
double up = 1e8/sizeof(Scalar);
|
||||
double tm0 = 4, tm1 = 10;
|
||||
if(NumTraits<Scalar>::IsComplex)
|
||||
{
|
||||
up /= 4;
|
||||
tm0 = 2;
|
||||
tm1 = 4;
|
||||
}
|
||||
|
||||
double flops = 2. * m * n;
|
||||
long rep = std::max(1., std::min(100., up/flops) );
|
||||
long tries = std::max(tm0, std::min(tm1, up/flops) );
|
||||
|
||||
BENCH(t, tries, rep, f(A,B,C));
|
||||
|
||||
return 1e-9 * rep * flops / t.best();
|
||||
}
|
||||
|
||||
template<typename Func>
|
||||
int main_gemv(int argc, char **argv, Func& f)
|
||||
{
|
||||
std::vector<double> results;
|
||||
|
||||
std::string filename = std::string("gemv_settings.txt");
|
||||
if(argc>1)
|
||||
filename = std::string(argv[1]);
|
||||
std::ifstream settings(filename);
|
||||
long m, n;
|
||||
while(settings >> m >> n)
|
||||
{
|
||||
//std::cerr << " Testing " << m << " " << n << std::endl;
|
||||
results.push_back( bench(m, n, f) );
|
||||
}
|
||||
|
||||
std::cout << RowVectorXd::Map(results.data(), results.size());
|
||||
|
||||
return 0;
|
||||
}
|
||||
11
libs/eigen/bench/perf_monitoring/gemv_settings.txt
Normal file
11
libs/eigen/bench/perf_monitoring/gemv_settings.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
8 8
|
||||
9 9
|
||||
24 24
|
||||
239 239
|
||||
240 240
|
||||
2400 24
|
||||
24 2400
|
||||
24 240
|
||||
2400 2400
|
||||
4800 23
|
||||
23 4800
|
||||
13
libs/eigen/bench/perf_monitoring/gemv_square_settings.txt
Normal file
13
libs/eigen/bench/perf_monitoring/gemv_square_settings.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
8 8
|
||||
9 9
|
||||
12 12
|
||||
15 15
|
||||
16 16
|
||||
24 24
|
||||
53 53
|
||||
74 74
|
||||
102 102
|
||||
239 239
|
||||
240 240
|
||||
2400 2400
|
||||
2463 2463
|
||||
12
libs/eigen/bench/perf_monitoring/gemvt.cpp
Normal file
12
libs/eigen/bench/perf_monitoring/gemvt.cpp
Normal file
@@ -0,0 +1,12 @@
|
||||
#include "gemv_common.h"
|
||||
|
||||
EIGEN_DONT_INLINE
|
||||
void gemv(const Mat &A, Vec &B, const Vec &C)
|
||||
{
|
||||
B.noalias() += A.transpose() * C;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
return main_gemv(argc, argv, gemv);
|
||||
}
|
||||
101
libs/eigen/bench/perf_monitoring/lazy_gemm.cpp
Normal file
101
libs/eigen/bench/perf_monitoring/lazy_gemm.cpp
Normal file
@@ -0,0 +1,101 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <Eigen/Core>
|
||||
#include "../../BenchTimer.h"
|
||||
using namespace Eigen;
|
||||
|
||||
#ifndef SCALAR
|
||||
#error SCALAR must be defined
|
||||
#endif
|
||||
|
||||
typedef SCALAR Scalar;
|
||||
|
||||
template<typename MatA, typename MatB, typename MatC>
|
||||
EIGEN_DONT_INLINE
|
||||
void lazy_gemm(const MatA &A, const MatB &B, MatC &C)
|
||||
{
|
||||
// escape((void*)A.data());
|
||||
// escape((void*)B.data());
|
||||
C.noalias() += A.lazyProduct(B);
|
||||
// escape((void*)C.data());
|
||||
}
|
||||
|
||||
template<int m, int n, int k, int TA>
|
||||
EIGEN_DONT_INLINE
|
||||
double bench()
|
||||
{
|
||||
typedef Matrix<Scalar,m,k,TA> MatA;
|
||||
typedef Matrix<Scalar,k,n> MatB;
|
||||
typedef Matrix<Scalar,m,n> MatC;
|
||||
|
||||
MatA A(m,k);
|
||||
MatB B(k,n);
|
||||
MatC C(m,n);
|
||||
A.setRandom();
|
||||
B.setRandom();
|
||||
C.setZero();
|
||||
|
||||
BenchTimer t;
|
||||
|
||||
double up = 1e7*4/sizeof(Scalar);
|
||||
double tm0 = 10, tm1 = 20;
|
||||
|
||||
double flops = 2. * m * n * k;
|
||||
long rep = std::max(10., std::min(10000., up/flops) );
|
||||
long tries = std::max(tm0, std::min(tm1, up/flops) );
|
||||
|
||||
BENCH(t, tries, rep, lazy_gemm(A,B,C));
|
||||
|
||||
return 1e-9 * rep * flops / t.best();
|
||||
}
|
||||
|
||||
template<int m, int n, int k>
|
||||
double bench_t(int t)
|
||||
{
|
||||
if(t)
|
||||
return bench<m,n,k,RowMajor>();
|
||||
else
|
||||
return bench<m,n,k,0>();
|
||||
}
|
||||
|
||||
EIGEN_DONT_INLINE
|
||||
double bench_mnk(int m, int n, int k, int t)
|
||||
{
|
||||
int id = m*10000 + n*100 + k;
|
||||
switch(id) {
|
||||
case 10101 : return bench_t< 1, 1, 1>(t); break;
|
||||
case 20202 : return bench_t< 2, 2, 2>(t); break;
|
||||
case 30303 : return bench_t< 3, 3, 3>(t); break;
|
||||
case 40404 : return bench_t< 4, 4, 4>(t); break;
|
||||
case 50505 : return bench_t< 5, 5, 5>(t); break;
|
||||
case 60606 : return bench_t< 6, 6, 6>(t); break;
|
||||
case 70707 : return bench_t< 7, 7, 7>(t); break;
|
||||
case 80808 : return bench_t< 8, 8, 8>(t); break;
|
||||
case 90909 : return bench_t< 9, 9, 9>(t); break;
|
||||
case 101010 : return bench_t<10,10,10>(t); break;
|
||||
case 111111 : return bench_t<11,11,11>(t); break;
|
||||
case 121212 : return bench_t<12,12,12>(t); break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
std::vector<double> results;
|
||||
|
||||
std::string filename = std::string("lazy_gemm_settings.txt");
|
||||
if(argc>1)
|
||||
filename = std::string(argv[1]);
|
||||
std::ifstream settings(filename);
|
||||
long m, n, k, t;
|
||||
while(settings >> m >> n >> k >> t)
|
||||
{
|
||||
//std::cerr << " Testing " << m << " " << n << " " << k << std::endl;
|
||||
results.push_back( bench_mnk(m, n, k, t) );
|
||||
}
|
||||
|
||||
std::cout << RowVectorXd::Map(results.data(), results.size());
|
||||
|
||||
return 0;
|
||||
}
|
||||
15
libs/eigen/bench/perf_monitoring/lazy_gemm_settings.txt
Normal file
15
libs/eigen/bench/perf_monitoring/lazy_gemm_settings.txt
Normal file
@@ -0,0 +1,15 @@
|
||||
1 1 1 0
|
||||
2 2 2 0
|
||||
3 3 3 0
|
||||
4 4 4 0
|
||||
4 4 4 1
|
||||
5 5 5 0
|
||||
6 6 6 0
|
||||
7 7 7 0
|
||||
7 7 7 1
|
||||
8 8 8 0
|
||||
9 9 9 0
|
||||
10 10 10 0
|
||||
11 11 11 0
|
||||
12 12 12 0
|
||||
12 12 12 1
|
||||
15
libs/eigen/bench/perf_monitoring/llt.cpp
Normal file
15
libs/eigen/bench/perf_monitoring/llt.cpp
Normal file
@@ -0,0 +1,15 @@
|
||||
#include "gemm_common.h"
|
||||
#include <Eigen/Cholesky>
|
||||
|
||||
EIGEN_DONT_INLINE
|
||||
void llt(const Mat &A, const Mat &B, Mat &C)
|
||||
{
|
||||
C = A;
|
||||
C.diagonal().array() += 1000;
|
||||
Eigen::internal::llt_inplace<Mat::Scalar, Lower>::blocked(C);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
return main_gemm(argc, argv, llt);
|
||||
}
|
||||
112
libs/eigen/bench/perf_monitoring/make_plot.sh
Executable file
112
libs/eigen/bench/perf_monitoring/make_plot.sh
Executable file
@@ -0,0 +1,112 @@
|
||||
#!/bin/bash
|
||||
|
||||
# base name of the bench
|
||||
# it reads $1.out
|
||||
# and generates $1.pdf
|
||||
WHAT=$1
|
||||
bench=$2
|
||||
settings_file=$3
|
||||
|
||||
header="rev "
|
||||
while read line
|
||||
do
|
||||
if [ ! -z '$line' ]; then
|
||||
header="$header \"$line\""
|
||||
fi
|
||||
done < $settings_file
|
||||
|
||||
echo $header > $WHAT.out.header
|
||||
cat $WHAT.out >> $WHAT.out.header
|
||||
|
||||
|
||||
echo "set title '$WHAT'" > $WHAT.gnuplot
|
||||
echo "set key autotitle columnhead outside " >> $WHAT.gnuplot
|
||||
echo "set xtics rotate 1" >> $WHAT.gnuplot
|
||||
|
||||
echo "set term pdf color rounded enhanced fontscale 0.35 size 7in,5in" >> $WHAT.gnuplot
|
||||
echo set output "'"$WHAT.pdf"'" >> $WHAT.gnuplot
|
||||
|
||||
col=`cat $settings_file | wc -l`
|
||||
echo "plot for [col=2:$col+1] '$WHAT.out.header' using 0:col:xticlabels(1) with lines" >> $WHAT.gnuplot
|
||||
echo " " >> $WHAT.gnuplot
|
||||
|
||||
gnuplot -persist < $WHAT.gnuplot
|
||||
|
||||
# generate a png file (thumbnail)
|
||||
convert -colors 256 -background white -density 300 -resize 300 -quality 0 $WHAT.pdf -background white -flatten $WHAT.png
|
||||
|
||||
# clean
|
||||
rm $WHAT.out.header $WHAT.gnuplot
|
||||
|
||||
|
||||
# generate html/svg graph
|
||||
|
||||
echo " " > $WHAT.html
|
||||
cat resources/chart_header.html > $WHAT.html
|
||||
echo 'var customSettings = {"TITLE":"","SUBTITLE":"","XLABEL":"","YLABEL":""};' >> $WHAT.html
|
||||
# 'data' is an array of datasets (i.e. curves), each of which is an object of the form
|
||||
# {
|
||||
# key: <name of the curve>,
|
||||
# color: <optional color of the curve>,
|
||||
# values: [{
|
||||
# r: <revision number>,
|
||||
# v: <GFlops>
|
||||
# }]
|
||||
# }
|
||||
echo 'var data = [' >> $WHAT.html
|
||||
|
||||
col=2
|
||||
while read line
|
||||
do
|
||||
if [ ! -z '$line' ]; then
|
||||
header="$header \"$line\""
|
||||
echo '{"key":"'$line'","values":[' >> $WHAT.html
|
||||
i=0
|
||||
while read line2
|
||||
do
|
||||
if [ ! -z "$line2" ]; then
|
||||
val=`echo $line2 | cut -s -f $col -d ' '`
|
||||
if [ -n "$val" ]; then # skip build failures
|
||||
echo '{"r":'$i',"v":'$val'},' >> $WHAT.html
|
||||
fi
|
||||
fi
|
||||
((i++))
|
||||
done < $WHAT.out
|
||||
echo ']},' >> $WHAT.html
|
||||
fi
|
||||
((col++))
|
||||
done < $settings_file
|
||||
echo '];' >> $WHAT.html
|
||||
|
||||
echo 'var changesets = [' >> $WHAT.html
|
||||
while read line2
|
||||
do
|
||||
if [ ! -z '$line2' ]; then
|
||||
echo '"'`echo $line2 | cut -f 1 -d ' '`'",' >> $WHAT.html
|
||||
fi
|
||||
done < $WHAT.out
|
||||
echo '];' >> $WHAT.html
|
||||
|
||||
echo 'var changesets_details = [' >> $WHAT.html
|
||||
while read line2
|
||||
do
|
||||
if [ ! -z '$line2' ]; then
|
||||
num=`echo "$line2" | cut -f 1 -d ' '`
|
||||
comment=`grep ":$num" changesets.txt | cut -f 2 -d '#'`
|
||||
echo '"'"$comment"'",' >> $WHAT.html
|
||||
fi
|
||||
done < $WHAT.out
|
||||
echo '];' >> $WHAT.html
|
||||
|
||||
echo 'var changesets_count = [' >> $WHAT.html
|
||||
i=0
|
||||
while read line2
|
||||
do
|
||||
if [ ! -z '$line2' ]; then
|
||||
echo $i ',' >> $WHAT.html
|
||||
fi
|
||||
((i++))
|
||||
done < $WHAT.out
|
||||
echo '];' >> $WHAT.html
|
||||
|
||||
cat resources/chart_footer.html >> $WHAT.html
|
||||
41
libs/eigen/bench/perf_monitoring/resources/chart_footer.html
Normal file
41
libs/eigen/bench/perf_monitoring/resources/chart_footer.html
Normal file
@@ -0,0 +1,41 @@
|
||||
/* setup the chart and its options */
|
||||
var chart = nv.models.lineChart()
|
||||
.color(d3.scale.category10().range())
|
||||
.margin({left: 75, bottom: 100})
|
||||
.forceX([0]).forceY([0]);
|
||||
|
||||
chart.x(function(datum){ return datum.r; })
|
||||
.xAxis.options({
|
||||
axisLabel: customSettings.XLABEL || 'Changeset',
|
||||
tickFormat: d3.format('.0f')
|
||||
});
|
||||
chart.xAxis
|
||||
.tickValues(changesets_count)
|
||||
.tickFormat(function(d){return changesets[d]})
|
||||
.rotateLabels(-90);
|
||||
|
||||
chart.y(function(datum){ return datum.v; })
|
||||
.yAxis.options({
|
||||
axisLabel: customSettings.YLABEL || 'GFlops'/*,
|
||||
tickFormat: function(val){ return d3.format('.0f')(val) + ' GFlops'; }*/
|
||||
});
|
||||
|
||||
chart.tooltip.headerFormatter(function(d) { return changesets[d]
|
||||
+ ' <p style="font-weight:normal;text-align: left;">'
|
||||
+ changesets_details[d] + "</p>"; });
|
||||
|
||||
//chart.useInteractiveGuideline(true);
|
||||
d3.select('#chart').datum(data).call(chart);
|
||||
var plot = d3.select('#chart > g');
|
||||
|
||||
/* setup the title */
|
||||
plot.append('text')
|
||||
.style('font-size', '24px')
|
||||
.attr('text-anchor', 'middle').attr('x', '50%').attr('y', '20px')
|
||||
.text(customSettings.TITLE || '');
|
||||
|
||||
/* ensure the chart is responsive */
|
||||
nv.utils.windowResize(chart.update);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
45
libs/eigen/bench/perf_monitoring/resources/chart_header.html
Normal file
45
libs/eigen/bench/perf_monitoring/resources/chart_header.html
Normal file
File diff suppressed because one or more lines are too long
3
libs/eigen/bench/perf_monitoring/resources/footer.html
Normal file
3
libs/eigen/bench/perf_monitoring/resources/footer.html
Normal file
@@ -0,0 +1,3 @@
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
42
libs/eigen/bench/perf_monitoring/resources/header.html
Normal file
42
libs/eigen/bench/perf_monitoring/resources/header.html
Normal file
@@ -0,0 +1,42 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<title>Eigen performance monitoring</title>
|
||||
<style type="text/css">
|
||||
|
||||
body
|
||||
{
|
||||
background:#fff;
|
||||
}
|
||||
th {
|
||||
|
||||
}
|
||||
img
|
||||
{
|
||||
width:auto;
|
||||
box-shadow:0px 0px 20px #cecece;
|
||||
margin: 20px 20px 20px 20px;
|
||||
-moz-transform: scale(1);
|
||||
-moz-transition-duration: 0.4s;
|
||||
-webkit-transition-duration: 0.4s;
|
||||
-webkit-transform: scale(1);
|
||||
|
||||
-ms-transform: scale(1);
|
||||
-ms-transition-duration: 0.4s;
|
||||
}
|
||||
img:hover
|
||||
{
|
||||
box-shadow: 5px 5px 20px #dcdcdc;
|
||||
-moz-transform: scale(1.1);
|
||||
-moz-transition-duration: 0.4s;
|
||||
-webkit-transition-duration: 0.4s;
|
||||
-webkit-transform: scale(1.1);
|
||||
|
||||
-ms-transform: scale(1.1);
|
||||
-ms-transition-duration: 0.4s;
|
||||
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
1
libs/eigen/bench/perf_monitoring/resources/s1.js
Normal file
1
libs/eigen/bench/perf_monitoring/resources/s1.js
Normal file
File diff suppressed because one or more lines are too long
1
libs/eigen/bench/perf_monitoring/resources/s2.js
Normal file
1
libs/eigen/bench/perf_monitoring/resources/s2.js
Normal file
File diff suppressed because one or more lines are too long
183
libs/eigen/bench/perf_monitoring/run.sh
Executable file
183
libs/eigen/bench/perf_monitoring/run.sh
Executable file
@@ -0,0 +1,183 @@
|
||||
#!/bin/bash
|
||||
|
||||
# ./run.sh gemm gemm_settings.txt
|
||||
# ./run.sh lazy_gemm lazy_gemm_settings.txt
|
||||
# ./run.sh gemv gemv_settings.txt
|
||||
# ./run.sh trmv_up gemv_square_settings.txt
|
||||
# ...
|
||||
|
||||
# Examples of environment variables to be set:
|
||||
# PREFIX="haswell-fma-"
|
||||
# CXX_FLAGS="-mfma"
|
||||
# CXX=clang++
|
||||
|
||||
# Options:
|
||||
# -up : enforce the recomputation of existing data, and keep best results as a merging strategy
|
||||
# -s : recompute selected changesets only and keep bests
|
||||
# -np : no plotting of results, just generate the data
|
||||
|
||||
bench=$1
|
||||
settings_file=$2
|
||||
|
||||
if [[ "$*" =~ '-up' ]]; then
|
||||
update=true
|
||||
else
|
||||
update=false
|
||||
fi
|
||||
|
||||
if [[ "$*" =~ '-s' ]]; then
|
||||
selected=true
|
||||
else
|
||||
selected=false
|
||||
fi
|
||||
|
||||
if [[ "$*" =~ '-np' ]]; then
|
||||
do_plot=false
|
||||
else
|
||||
do_plot=true
|
||||
fi
|
||||
|
||||
|
||||
WORKING_DIR=${PREFIX:?"default"}
|
||||
|
||||
if [ -z "$PREFIX" ]; then
|
||||
WORKING_DIR_PREFIX="$WORKING_DIR/"
|
||||
else
|
||||
WORKING_DIR_PREFIX="$WORKING_DIR/$PREFIX-"
|
||||
fi
|
||||
echo "WORKING_DIR_PREFIX=$WORKING_DIR_PREFIX"
|
||||
mkdir -p $WORKING_DIR
|
||||
|
||||
global_args="$*"
|
||||
|
||||
if $selected ; then
|
||||
echo "Recompute selected changesets only and keep bests"
|
||||
elif $update ; then
|
||||
echo "(Re-)Compute all changesets and keep bests"
|
||||
else
|
||||
echo "Skip previously computed changesets"
|
||||
fi
|
||||
|
||||
|
||||
|
||||
if [ ! -d "eigen_src" ]; then
|
||||
git clone https://gitlab.com/libeigen/eigen.git eigen_src
|
||||
else
|
||||
cd eigen_src
|
||||
git pull
|
||||
cd ..
|
||||
fi
|
||||
|
||||
if [ -z "$CXX" ]; then
|
||||
CXX=g++
|
||||
fi
|
||||
|
||||
function make_backup
|
||||
{
|
||||
if [ -f "$1.out" ]; then
|
||||
mv "$1.out" "$1.backup"
|
||||
fi
|
||||
}
|
||||
|
||||
function merge
|
||||
{
|
||||
count1=`echo $1 | wc -w`
|
||||
count2=`echo $2 | wc -w`
|
||||
|
||||
if [ $count1 == $count2 ]; then
|
||||
a=( $1 ); b=( $2 )
|
||||
res=""
|
||||
for (( i=0 ; i<$count1 ; i++ )); do
|
||||
ai=${a[$i]}; bi=${b[$i]}
|
||||
tmp=`echo "if ($ai > $bi) $ai else $bi " | bc -l`
|
||||
res="$res $tmp"
|
||||
done
|
||||
echo $res
|
||||
|
||||
else
|
||||
echo $1
|
||||
fi
|
||||
}
|
||||
|
||||
function test_current
|
||||
{
|
||||
rev=$1
|
||||
scalar=$2
|
||||
name=$3
|
||||
|
||||
prev=""
|
||||
if [ -e "$name.backup" ]; then
|
||||
prev=`grep $rev "$name.backup" | cut -d ' ' -f 2-`
|
||||
fi
|
||||
res=$prev
|
||||
count_rev=`echo $prev | wc -w`
|
||||
count_ref=`cat $settings_file | wc -l`
|
||||
if echo "$global_args" | grep "$rev" > /dev/null; then
|
||||
rev_found=true
|
||||
else
|
||||
rev_found=false
|
||||
fi
|
||||
# echo $update et $selected et $rev_found because $rev et "$global_args"
|
||||
# echo $count_rev et $count_ref
|
||||
if $update || [ $count_rev != $count_ref ] || ( $selected && $rev_found ); then
|
||||
echo "RUN: $CXX -O3 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src $bench.cpp -DSCALAR=$scalar -o $name"
|
||||
if $CXX -O3 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src $bench.cpp -DSCALAR=$scalar -o $name; then
|
||||
curr=`./$name $settings_file`
|
||||
if [ $count_rev == $count_ref ]; then
|
||||
echo "merge previous $prev"
|
||||
echo "with new $curr"
|
||||
else
|
||||
echo "got $curr"
|
||||
fi
|
||||
res=`merge "$curr" "$prev"`
|
||||
# echo $res
|
||||
echo "$rev $res" >> $name.out
|
||||
else
|
||||
echo "Compilation failed, skip rev $rev"
|
||||
fi
|
||||
else
|
||||
echo "Skip existing results for $rev / $name"
|
||||
echo "$rev $res" >> $name.out
|
||||
fi
|
||||
}
|
||||
|
||||
make_backup $WORKING_DIR_PREFIX"s"$bench
|
||||
make_backup $WORKING_DIR_PREFIX"d"$bench
|
||||
make_backup $WORKING_DIR_PREFIX"c"$bench
|
||||
|
||||
cut -f1 -d"#" < changesets.txt | grep -E '[[:alnum:]]' | while read rev
|
||||
do
|
||||
if [ ! -z '$rev' ]; then
|
||||
rev2=`echo $rev | cut -f 2 -d':'`
|
||||
echo "Testing rev $rev, $rev2"
|
||||
cd eigen_src
|
||||
git checkout $rev2 > /dev/null
|
||||
actual_rev=`git rev-parse --short HEAD`
|
||||
cd ..
|
||||
|
||||
test_current $actual_rev float $WORKING_DIR_PREFIX"s"$bench
|
||||
test_current $actual_rev double $WORKING_DIR_PREFIX"d"$bench
|
||||
test_current $actual_rev "std::complex<double>" $WORKING_DIR_PREFIX"c"$bench
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
echo "Float:"
|
||||
cat $WORKING_DIR_PREFIX"s""$bench.out"
|
||||
echo " "
|
||||
|
||||
echo "Double:"
|
||||
cat $WORKING_DIR_PREFIX"d""$bench.out"
|
||||
echo ""
|
||||
|
||||
echo "Complex:"
|
||||
cat $WORKING_DIR_PREFIX"c""$bench.out"
|
||||
echo ""
|
||||
|
||||
if $do_plot ; then
|
||||
|
||||
./make_plot.sh $WORKING_DIR_PREFIX"s"$bench $bench $settings_file
|
||||
./make_plot.sh $WORKING_DIR_PREFIX"d"$bench $bench $settings_file
|
||||
./make_plot.sh $WORKING_DIR_PREFIX"c"$bench $bench $settings_file
|
||||
|
||||
fi
|
||||
72
libs/eigen/bench/perf_monitoring/runall.sh
Executable file
72
libs/eigen/bench/perf_monitoring/runall.sh
Executable file
@@ -0,0 +1,72 @@
|
||||
#!/bin/bash
|
||||
|
||||
# ./runall.sh "Title"
|
||||
|
||||
# Examples of environment variables to be set:
|
||||
# PREFIX="haswell-fma-"
|
||||
# CXX_FLAGS="-mfma"
|
||||
# CXX=clang++
|
||||
|
||||
# Options:
|
||||
# -up : enforce the recomputation of existing data, and keep best results as a merging strategy
|
||||
# -s : recompute selected changesets only and keep bests
|
||||
# -np : no plotting of results, just generate the data
|
||||
|
||||
if [[ "$*" =~ '-np' ]]; then
|
||||
do_plot=false
|
||||
else
|
||||
do_plot=true
|
||||
fi
|
||||
|
||||
./run.sh gemm gemm_settings.txt $*
|
||||
./run.sh lazy_gemm lazy_gemm_settings.txt $*
|
||||
./run.sh gemv gemv_settings.txt $*
|
||||
./run.sh gemvt gemv_settings.txt $*
|
||||
./run.sh trmv_up gemv_square_settings.txt $*
|
||||
./run.sh trmv_lo gemv_square_settings.txt $*
|
||||
./run.sh trmv_upt gemv_square_settings.txt $*
|
||||
./run.sh trmv_lot gemv_square_settings.txt $*
|
||||
./run.sh llt gemm_square_settings.txt $*
|
||||
|
||||
if $do_plot ; then
|
||||
|
||||
# generate html file
|
||||
|
||||
function print_td {
|
||||
echo '<td><a href="'$PREFIX'-'$1"$2"'.html"><img src="'$PREFIX'-'$1"$2"'.png" title="'$3'"></a></td>' >> $htmlfile
|
||||
}
|
||||
|
||||
function print_tr {
|
||||
echo '<tr><th colspan="3">'"$2"'</th></tr>' >> $htmlfile
|
||||
echo '<tr>' >> $htmlfile
|
||||
print_td s $1 float
|
||||
print_td d $1 double
|
||||
print_td c $1 complex
|
||||
echo '</tr>' >> $htmlfile
|
||||
}
|
||||
|
||||
if [ -n "$PREFIX" ]; then
|
||||
|
||||
|
||||
cp resources/s1.js $PREFIX/
|
||||
cp resources/s2.js $PREFIX/
|
||||
|
||||
htmlfile="$PREFIX/index.html"
|
||||
cat resources/header.html > $htmlfile
|
||||
|
||||
echo '<h1>'$1'</h1>' >> $htmlfile
|
||||
echo '<table>' >> $htmlfile
|
||||
print_tr gemm 'C += A · B (gemm)'
|
||||
print_tr lazy_gemm 'C += A · B (gemm lazy)'
|
||||
print_tr gemv 'y += A · x (gemv)'
|
||||
print_tr gemvt 'y += A<sup>T</sup> · x (gemv)'
|
||||
print_tr trmv_up 'y += U · x (trmv)'
|
||||
print_tr trmv_upt 'y += U<sup>T</sup> · x (trmv)'
|
||||
print_tr trmv_lo 'y += L · x (trmv)'
|
||||
print_tr trmv_lot 'y += L<sup>T</sup> · x (trmv)'
|
||||
print_tr trmv_lot 'L · L<sup>T<sup> = A (Cholesky,potrf)'
|
||||
|
||||
cat resources/footer.html >> $htmlfile
|
||||
|
||||
fi
|
||||
fi
|
||||
12
libs/eigen/bench/perf_monitoring/trmv_lo.cpp
Normal file
12
libs/eigen/bench/perf_monitoring/trmv_lo.cpp
Normal file
@@ -0,0 +1,12 @@
|
||||
#include "gemv_common.h"
|
||||
|
||||
EIGEN_DONT_INLINE
|
||||
void trmv(const Mat &A, const Vec &B, Vec &C)
|
||||
{
|
||||
C.noalias() += A.triangularView<Lower>() * B;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
return main_gemv(argc, argv, trmv);
|
||||
}
|
||||
12
libs/eigen/bench/perf_monitoring/trmv_lot.cpp
Normal file
12
libs/eigen/bench/perf_monitoring/trmv_lot.cpp
Normal file
@@ -0,0 +1,12 @@
|
||||
#include "gemv_common.h"
|
||||
|
||||
EIGEN_DONT_INLINE
|
||||
void trmv(const Mat &A, Vec &B, const Vec &C)
|
||||
{
|
||||
B.noalias() += A.transpose().triangularView<Lower>() * C;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
return main_gemv(argc, argv, trmv);
|
||||
}
|
||||
12
libs/eigen/bench/perf_monitoring/trmv_up.cpp
Normal file
12
libs/eigen/bench/perf_monitoring/trmv_up.cpp
Normal file
@@ -0,0 +1,12 @@
|
||||
#include "gemv_common.h"
|
||||
|
||||
EIGEN_DONT_INLINE
|
||||
void trmv(const Mat &A, const Vec &B, Vec &C)
|
||||
{
|
||||
C.noalias() += A.triangularView<Upper>() * B;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
return main_gemv(argc, argv, trmv);
|
||||
}
|
||||
12
libs/eigen/bench/perf_monitoring/trmv_upt.cpp
Normal file
12
libs/eigen/bench/perf_monitoring/trmv_upt.cpp
Normal file
@@ -0,0 +1,12 @@
|
||||
#include "gemv_common.h"
|
||||
|
||||
EIGEN_DONT_INLINE
|
||||
void trmv(const Mat &A, Vec &B, const Vec &C)
|
||||
{
|
||||
B.noalias() += A.transpose().triangularView<Upper>() * C;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
return main_gemv(argc, argv, trmv);
|
||||
}
|
||||
Reference in New Issue
Block a user