ADD: new track message, Entity class and Position class

This commit is contained in:
Henry Winkel
2022-12-20 17:20:35 +01:00
parent 469ecfb099
commit 98ebb563a8
2114 changed files with 482360 additions and 24 deletions

View File

@@ -0,0 +1,95 @@
Load hg-to-git hash maps from ./eigen_git/.git/
#3.0.1
#3.1.1
#3.2.0
3.2.4
#574a7621809fe
58964a85800bd # introduce AVX
#589cbd7e98174 # merge
589db7d49efbb # introduce FMA
#590a078f442a3 # complex and AVX
590a419cea4a0 # improve packing with ptranspose
#59251e85c936d # merge
#592e497a27ddc
593d5a795f673 # New gebp kernel: up to 3 packets x 4 register-level blocks
#5942c3c95990d # merge
#596c9788d55b9 # Disable 3pX4 kernel on Altivec
#5999aa3dc4e21 # merge
6209452eb38f8 # before-evaluators
#6333eba5e1101 # Implement evaluator for sparse outer products
#663b9d314ae19
#6655ef95fabee # Properly detect FMA support on ARM
#667fe25f3b8e3 # FMA has been wrongly disabled
#668409547a0c8
#6694304c73542 # merge default to tensors
#67216047c8d4a # merge default to tensors
#67410a79ca3a3 # merge default to tensors
#674b7271dffb5 # Generalized the gebp apis
676bfdd9f3ac9 # Made the blocking computation aware of the l3 cache;<br/> Also optimized the blocking parameters to take<br/> into account the number of threads used for a computation.
6782dde63499c # generalized gemv
6799f98650d0a # ensured that contractions that can be reduced to a matrix vector product
#6840918c51e60 # merge tensor
684e972b55ec4 # change prefetching in gebp
#68598604576d1 # merge index conversion
68963eb0f6fe6 # clean blocking size computation
689db05f2d01e # rotating kernel for ARM only
#6901b7e12847d # result_of
69226275b250a # fix prefetching change for ARM
692692136350b # prefetching
693a8ad8887bf # blocking size strategy
693bcf9bb5c1f # avoid redundant pack_rhs
6987550107028 # dynamic loop swapping
69858740ce4c6 # rm dynamic loop swapping,<br/> adjust lhs's micro panel height to fully exploit L1 cache
698cd3bbffa73 # blocking heuristic:<br/> block on the rhs in L1 if the lhs fit in L1.
701488c15615a # organize a little our default cache sizes,<br/> and use a saner default L1 outside of x86 (10% faster on Nexus 5)
701e56aabf205 # Refactor computeProductBlockingSizes to make room<br/> for the possibility of using lookup tables
701ca5c12587b # Polish lookup tables generation
7013589a9c115 # actual_panel_rows computation should always be resilient<br/> to parameters not consistent with the known L1 cache size, see comment
70102babb9c0f # Provide a empirical lookup table for blocking sizes measured on a Nexus 5.<br/> Only for float, only for Android on ARM 32bit for now.
7088481dc21ea # Bug 986: add support for coefficient-based<br/> product with 0 depth.
709d7f51feb07 # Bug 992: don't select a 3p GEMM path with non-SIMD scalar types.
759f9303cc7c5 # 3.3-alpha1
765aba1eda71e # help clang inlining
770fe630c9873 # Improve numerical accuracy in LLT and triangular solve<br/> by using true scalar divisions (instead of x * (1/y))
#8741d23430628 # Improved the matrix multiplication blocking in the case<br/> where mr is not a power of 2 (e.g on Haswell CPUs)
878f629fe95c8 # Made the index type a template parameter to evaluateProductBlockingSizes.<br/> Use numext::mini and numext::maxi instead of <br/> std::min/std::max to compute blocking sizes.
8975d51a7f12c # Don't optimize the processing of the last rows of<br/> a matrix matrix product in cases that violate<br/> the assumptions made by the optimized code path.
8986136f4fdd4 # Remove the rotating kernel.
898e68e165a23 # Bug 256: enable vectorization with unaligned loads/stores.
91466e99ab6a1 # Relax mixing-type constraints for binary coeff-wise operators
91776236cdea4 # merge
917101ea26f5e # Include the cost of stores in unrolling
921672076db5d # Fix perf regression introduced in changeset e56aabf205
9210fa9e4a15c # Fix perf regression in dgemm introduced by changeset 5d51a7f12c
936f6b3cf8de9 # 3.3-beta2
944504a4404f1 # Optimize expression matching 'd?=a-b*c' as 'd?=a; d?=b*c;'
95877e27fbeee # 3.3-rc1
959779774f98c # Bug 1311: fix alignment logic in some cases<br/> of (scalar*small).lazyProduct(small)
9729f9d8d2f62 # Disabled part of the matrix matrix peeling code<br/> that's incompatible with 512 bit registers
979eeac81b8c0 # 3.3.0
989c927af60ed # Fix a performance regression in (mat*mat)*vec<br/> for which mat*mat was evaluated multiple times.
994fe696022ec # Operators += and -= do not resize!
99466f65ccc36 # Ease compiler generating clean and efficient code in mat*vec
9946a5fe86098 # Complete rewrite of column-major-matrix * vector product<br/> to deliver higher performance of modern CPU.
99591003f3b86 # Improve performance of row-major-dense-matrix * vector products<br/> for recent CPUs.
997eb621413c1 # Revert vec/y to vec*(1/y) in row-major TRSM
10444bbc320468 # Bug 1435: fix aliasing issue in exressions like: A = C - B*A;
1073624df50945 # Adds missing EIGEN_STRONG_INLINE to support MSVC<br/> properly inlining small vector calculations
1094d428a199ab # Bug 1562: optimize evaluation of small products<br/> of the form s*A*B by rewriting them as: s*(A.lazyProduct(B))<br/> to save a costly temporary.<br/> Measured speedup from 2x to 5x.
1096de9e31a06d # Introduce the macro ei_declare_local_nested_eval to<br/> help allocating on the stack local temporaries via alloca,<br/> and let outer-products makes a good use of it.
11087b91c11207 # Bug 1578: Improve prefetching in matrix multiplication on MIPS.
1153aa110e681b # PR 526: Speed up multiplication of small, dynamically sized matrices
11544ad359237a # Vectorize row-by-row gebp loop iterations on 16 packets as well
1157a476054879 # Bug 1624: improve matrix-matrix product on ARM 64, 20% speedup
1160a4159dba08 # do not read buffers out of bounds
1163c53eececb0 # Implement AVX512 vectorization of std::complex<float/double>
11644e7746fe22 # Bug 1636: fix gemm performance issue with gcc>=6 and no FMA
1164956678a4ef # Bug 1515: disable gebp's 3pX4 micro kernel<br/> for MSVC<=19.14 because of register spilling.
1165426bce7529 # fix EIGEN_GEBP_2PX4_SPILLING_WORKAROUND<br/> for non vectorized type, and non x86/64 target
11660d90637838 # enable spilling workaround on architectures with SSE/AVX
1166f159cf3d75 # Artificially increase l1-blocking size for AVX512.<br/> +10% speedup with current kernels.
11686dd93f7e3b # Make code compile again for older compilers.
1175dbfcceabf5 # Bug: 1633: refactor gebp kernel and optimize for neon
117670e133333d # Bug 1661: fix regression in GEBP and AVX512
11760f028f61cb # GEBP: cleanup logic to choose between<br/> a 4 packets of 1 packet (=e118ce86fd+fix)
1180de77bf5d6c # gebp: Add new ½ and ¼ packet rows per (peeling) round on the lhs

View File

@@ -0,0 +1,12 @@
#include "gemm_common.h"
EIGEN_DONT_INLINE
void gemm(const Mat &A, const Mat &B, Mat &C)
{
C.noalias() += A * B;
}
int main(int argc, char **argv)
{
return main_gemm(argc, argv, gemm);
}

View File

@@ -0,0 +1,67 @@
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include "eigen_src/Eigen/Core"
#include "../BenchTimer.h"
using namespace Eigen;
#ifndef SCALAR
#error SCALAR must be defined
#endif
typedef SCALAR Scalar;
typedef Matrix<Scalar,Dynamic,Dynamic> Mat;
template<typename Func>
EIGEN_DONT_INLINE
double bench(long m, long n, long k, const Func& f)
{
Mat A(m,k);
Mat B(k,n);
Mat C(m,n);
A.setRandom();
B.setRandom();
C.setZero();
BenchTimer t;
double up = 1e8*4/sizeof(Scalar);
double tm0 = 4, tm1 = 10;
if(NumTraits<Scalar>::IsComplex)
{
up /= 4;
tm0 = 2;
tm1 = 4;
}
double flops = 2. * m * n * k;
long rep = std::max(1., std::min(100., up/flops) );
long tries = std::max(tm0, std::min(tm1, up/flops) );
BENCH(t, tries, rep, f(A,B,C));
return 1e-9 * rep * flops / t.best();
}
template<typename Func>
int main_gemm(int argc, char **argv, const Func& f)
{
std::vector<double> results;
std::string filename = std::string("gemm_settings.txt");
if(argc>1)
filename = std::string(argv[1]);
std::ifstream settings(filename);
long m, n, k;
while(settings >> m >> n >> k)
{
//std::cerr << " Testing " << m << " " << n << " " << k << std::endl;
results.push_back( bench(m, n, k, f) );
}
std::cout << RowVectorXd::Map(results.data(), results.size());
return 0;
}

View File

@@ -0,0 +1,15 @@
8 8 8
9 9 9
24 24 24
239 239 239
240 240 240
2400 24 24
24 2400 24
24 24 2400
24 2400 2400
2400 24 2400
2400 2400 24
2400 2400 64
4800 23 160
23 4800 160
2400 2400 2400

View File

@@ -0,0 +1,11 @@
8 8 8
9 9 9
12 12 12
15 15 15
16 16 16
24 24 24
102 102 102
239 239 239
240 240 240
2400 2400 2400
2463 2463 2463

View File

@@ -0,0 +1,12 @@
#include "gemv_common.h"
EIGEN_DONT_INLINE
void gemv(const Mat &A, const Vec &B, Vec &C)
{
C.noalias() += A * B;
}
int main(int argc, char **argv)
{
return main_gemv(argc, argv, gemv);
}

View File

@@ -0,0 +1,69 @@
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <functional>
#include "eigen_src/Eigen/Core"
#include "../BenchTimer.h"
using namespace Eigen;
#ifndef SCALAR
#error SCALAR must be defined
#endif
typedef SCALAR Scalar;
typedef Matrix<Scalar,Dynamic,Dynamic> Mat;
typedef Matrix<Scalar,Dynamic,1> Vec;
template<typename Func>
EIGEN_DONT_INLINE
double bench(long m, long n, Func &f)
{
Mat A(m,n);
Vec B(n);
Vec C(m);
A.setRandom();
B.setRandom();
C.setRandom();
BenchTimer t;
double up = 1e8/sizeof(Scalar);
double tm0 = 4, tm1 = 10;
if(NumTraits<Scalar>::IsComplex)
{
up /= 4;
tm0 = 2;
tm1 = 4;
}
double flops = 2. * m * n;
long rep = std::max(1., std::min(100., up/flops) );
long tries = std::max(tm0, std::min(tm1, up/flops) );
BENCH(t, tries, rep, f(A,B,C));
return 1e-9 * rep * flops / t.best();
}
template<typename Func>
int main_gemv(int argc, char **argv, Func& f)
{
std::vector<double> results;
std::string filename = std::string("gemv_settings.txt");
if(argc>1)
filename = std::string(argv[1]);
std::ifstream settings(filename);
long m, n;
while(settings >> m >> n)
{
//std::cerr << " Testing " << m << " " << n << std::endl;
results.push_back( bench(m, n, f) );
}
std::cout << RowVectorXd::Map(results.data(), results.size());
return 0;
}

View File

@@ -0,0 +1,11 @@
8 8
9 9
24 24
239 239
240 240
2400 24
24 2400
24 240
2400 2400
4800 23
23 4800

View File

@@ -0,0 +1,13 @@
8 8
9 9
12 12
15 15
16 16
24 24
53 53
74 74
102 102
239 239
240 240
2400 2400
2463 2463

View File

@@ -0,0 +1,12 @@
#include "gemv_common.h"
EIGEN_DONT_INLINE
void gemv(const Mat &A, Vec &B, const Vec &C)
{
B.noalias() += A.transpose() * C;
}
int main(int argc, char **argv)
{
return main_gemv(argc, argv, gemv);
}

View File

@@ -0,0 +1,101 @@
#include <iostream>
#include <fstream>
#include <vector>
#include <Eigen/Core>
#include "../../BenchTimer.h"
using namespace Eigen;
#ifndef SCALAR
#error SCALAR must be defined
#endif
typedef SCALAR Scalar;
template<typename MatA, typename MatB, typename MatC>
EIGEN_DONT_INLINE
void lazy_gemm(const MatA &A, const MatB &B, MatC &C)
{
// escape((void*)A.data());
// escape((void*)B.data());
C.noalias() += A.lazyProduct(B);
// escape((void*)C.data());
}
template<int m, int n, int k, int TA>
EIGEN_DONT_INLINE
double bench()
{
typedef Matrix<Scalar,m,k,TA> MatA;
typedef Matrix<Scalar,k,n> MatB;
typedef Matrix<Scalar,m,n> MatC;
MatA A(m,k);
MatB B(k,n);
MatC C(m,n);
A.setRandom();
B.setRandom();
C.setZero();
BenchTimer t;
double up = 1e7*4/sizeof(Scalar);
double tm0 = 10, tm1 = 20;
double flops = 2. * m * n * k;
long rep = std::max(10., std::min(10000., up/flops) );
long tries = std::max(tm0, std::min(tm1, up/flops) );
BENCH(t, tries, rep, lazy_gemm(A,B,C));
return 1e-9 * rep * flops / t.best();
}
template<int m, int n, int k>
double bench_t(int t)
{
if(t)
return bench<m,n,k,RowMajor>();
else
return bench<m,n,k,0>();
}
EIGEN_DONT_INLINE
double bench_mnk(int m, int n, int k, int t)
{
int id = m*10000 + n*100 + k;
switch(id) {
case 10101 : return bench_t< 1, 1, 1>(t); break;
case 20202 : return bench_t< 2, 2, 2>(t); break;
case 30303 : return bench_t< 3, 3, 3>(t); break;
case 40404 : return bench_t< 4, 4, 4>(t); break;
case 50505 : return bench_t< 5, 5, 5>(t); break;
case 60606 : return bench_t< 6, 6, 6>(t); break;
case 70707 : return bench_t< 7, 7, 7>(t); break;
case 80808 : return bench_t< 8, 8, 8>(t); break;
case 90909 : return bench_t< 9, 9, 9>(t); break;
case 101010 : return bench_t<10,10,10>(t); break;
case 111111 : return bench_t<11,11,11>(t); break;
case 121212 : return bench_t<12,12,12>(t); break;
}
return 0;
}
int main(int argc, char **argv)
{
std::vector<double> results;
std::string filename = std::string("lazy_gemm_settings.txt");
if(argc>1)
filename = std::string(argv[1]);
std::ifstream settings(filename);
long m, n, k, t;
while(settings >> m >> n >> k >> t)
{
//std::cerr << " Testing " << m << " " << n << " " << k << std::endl;
results.push_back( bench_mnk(m, n, k, t) );
}
std::cout << RowVectorXd::Map(results.data(), results.size());
return 0;
}

View File

@@ -0,0 +1,15 @@
1 1 1 0
2 2 2 0
3 3 3 0
4 4 4 0
4 4 4 1
5 5 5 0
6 6 6 0
7 7 7 0
7 7 7 1
8 8 8 0
9 9 9 0
10 10 10 0
11 11 11 0
12 12 12 0
12 12 12 1

View File

@@ -0,0 +1,15 @@
#include "gemm_common.h"
#include <Eigen/Cholesky>
EIGEN_DONT_INLINE
void llt(const Mat &A, const Mat &B, Mat &C)
{
C = A;
C.diagonal().array() += 1000;
Eigen::internal::llt_inplace<Mat::Scalar, Lower>::blocked(C);
}
int main(int argc, char **argv)
{
return main_gemm(argc, argv, llt);
}

View File

@@ -0,0 +1,112 @@
#!/bin/bash
# base name of the bench
# it reads $1.out
# and generates $1.pdf
WHAT=$1
bench=$2
settings_file=$3
header="rev "
while read line
do
if [ ! -z '$line' ]; then
header="$header \"$line\""
fi
done < $settings_file
echo $header > $WHAT.out.header
cat $WHAT.out >> $WHAT.out.header
echo "set title '$WHAT'" > $WHAT.gnuplot
echo "set key autotitle columnhead outside " >> $WHAT.gnuplot
echo "set xtics rotate 1" >> $WHAT.gnuplot
echo "set term pdf color rounded enhanced fontscale 0.35 size 7in,5in" >> $WHAT.gnuplot
echo set output "'"$WHAT.pdf"'" >> $WHAT.gnuplot
col=`cat $settings_file | wc -l`
echo "plot for [col=2:$col+1] '$WHAT.out.header' using 0:col:xticlabels(1) with lines" >> $WHAT.gnuplot
echo " " >> $WHAT.gnuplot
gnuplot -persist < $WHAT.gnuplot
# generate a png file (thumbnail)
convert -colors 256 -background white -density 300 -resize 300 -quality 0 $WHAT.pdf -background white -flatten $WHAT.png
# clean
rm $WHAT.out.header $WHAT.gnuplot
# generate html/svg graph
echo " " > $WHAT.html
cat resources/chart_header.html > $WHAT.html
echo 'var customSettings = {"TITLE":"","SUBTITLE":"","XLABEL":"","YLABEL":""};' >> $WHAT.html
# 'data' is an array of datasets (i.e. curves), each of which is an object of the form
# {
# key: <name of the curve>,
# color: <optional color of the curve>,
# values: [{
# r: <revision number>,
# v: <GFlops>
# }]
# }
echo 'var data = [' >> $WHAT.html
col=2
while read line
do
if [ ! -z '$line' ]; then
header="$header \"$line\""
echo '{"key":"'$line'","values":[' >> $WHAT.html
i=0
while read line2
do
if [ ! -z "$line2" ]; then
val=`echo $line2 | cut -s -f $col -d ' '`
if [ -n "$val" ]; then # skip build failures
echo '{"r":'$i',"v":'$val'},' >> $WHAT.html
fi
fi
((i++))
done < $WHAT.out
echo ']},' >> $WHAT.html
fi
((col++))
done < $settings_file
echo '];' >> $WHAT.html
echo 'var changesets = [' >> $WHAT.html
while read line2
do
if [ ! -z '$line2' ]; then
echo '"'`echo $line2 | cut -f 1 -d ' '`'",' >> $WHAT.html
fi
done < $WHAT.out
echo '];' >> $WHAT.html
echo 'var changesets_details = [' >> $WHAT.html
while read line2
do
if [ ! -z '$line2' ]; then
num=`echo "$line2" | cut -f 1 -d ' '`
comment=`grep ":$num" changesets.txt | cut -f 2 -d '#'`
echo '"'"$comment"'",' >> $WHAT.html
fi
done < $WHAT.out
echo '];' >> $WHAT.html
echo 'var changesets_count = [' >> $WHAT.html
i=0
while read line2
do
if [ ! -z '$line2' ]; then
echo $i ',' >> $WHAT.html
fi
((i++))
done < $WHAT.out
echo '];' >> $WHAT.html
cat resources/chart_footer.html >> $WHAT.html

View File

@@ -0,0 +1,41 @@
/* setup the chart and its options */
var chart = nv.models.lineChart()
.color(d3.scale.category10().range())
.margin({left: 75, bottom: 100})
.forceX([0]).forceY([0]);
chart.x(function(datum){ return datum.r; })
.xAxis.options({
axisLabel: customSettings.XLABEL || 'Changeset',
tickFormat: d3.format('.0f')
});
chart.xAxis
.tickValues(changesets_count)
.tickFormat(function(d){return changesets[d]})
.rotateLabels(-90);
chart.y(function(datum){ return datum.v; })
.yAxis.options({
axisLabel: customSettings.YLABEL || 'GFlops'/*,
tickFormat: function(val){ return d3.format('.0f')(val) + ' GFlops'; }*/
});
chart.tooltip.headerFormatter(function(d) { return changesets[d]
+ ' <p style="font-weight:normal;text-align: left;">'
+ changesets_details[d] + "</p>"; });
//chart.useInteractiveGuideline(true);
d3.select('#chart').datum(data).call(chart);
var plot = d3.select('#chart > g');
/* setup the title */
plot.append('text')
.style('font-size', '24px')
.attr('text-anchor', 'middle').attr('x', '50%').attr('y', '20px')
.text(customSettings.TITLE || '');
/* ensure the chart is responsive */
nv.utils.windowResize(chart.update);
</script>
</body>
</html>

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,3 @@
</table>
</body>
</html>

View File

@@ -0,0 +1,42 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Eigen performance monitoring</title>
<style type="text/css">
body
{
background:#fff;
}
th {
}
img
{
width:auto;
box-shadow:0px 0px 20px #cecece;
margin: 20px 20px 20px 20px;
-moz-transform: scale(1);
-moz-transition-duration: 0.4s;
-webkit-transition-duration: 0.4s;
-webkit-transform: scale(1);
-ms-transform: scale(1);
-ms-transition-duration: 0.4s;
}
img:hover
{
box-shadow: 5px 5px 20px #dcdcdc;
-moz-transform: scale(1.1);
-moz-transition-duration: 0.4s;
-webkit-transition-duration: 0.4s;
-webkit-transform: scale(1.1);
-ms-transform: scale(1.1);
-ms-transition-duration: 0.4s;
}
</style>
</head>
<body>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,183 @@
#!/bin/bash
# ./run.sh gemm gemm_settings.txt
# ./run.sh lazy_gemm lazy_gemm_settings.txt
# ./run.sh gemv gemv_settings.txt
# ./run.sh trmv_up gemv_square_settings.txt
# ...
# Examples of environment variables to be set:
# PREFIX="haswell-fma-"
# CXX_FLAGS="-mfma"
# CXX=clang++
# Options:
# -up : enforce the recomputation of existing data, and keep best results as a merging strategy
# -s : recompute selected changesets only and keep bests
# -np : no plotting of results, just generate the data
bench=$1
settings_file=$2
if [[ "$*" =~ '-up' ]]; then
update=true
else
update=false
fi
if [[ "$*" =~ '-s' ]]; then
selected=true
else
selected=false
fi
if [[ "$*" =~ '-np' ]]; then
do_plot=false
else
do_plot=true
fi
WORKING_DIR=${PREFIX:?"default"}
if [ -z "$PREFIX" ]; then
WORKING_DIR_PREFIX="$WORKING_DIR/"
else
WORKING_DIR_PREFIX="$WORKING_DIR/$PREFIX-"
fi
echo "WORKING_DIR_PREFIX=$WORKING_DIR_PREFIX"
mkdir -p $WORKING_DIR
global_args="$*"
if $selected ; then
echo "Recompute selected changesets only and keep bests"
elif $update ; then
echo "(Re-)Compute all changesets and keep bests"
else
echo "Skip previously computed changesets"
fi
if [ ! -d "eigen_src" ]; then
git clone https://gitlab.com/libeigen/eigen.git eigen_src
else
cd eigen_src
git pull
cd ..
fi
if [ -z "$CXX" ]; then
CXX=g++
fi
function make_backup
{
if [ -f "$1.out" ]; then
mv "$1.out" "$1.backup"
fi
}
function merge
{
count1=`echo $1 | wc -w`
count2=`echo $2 | wc -w`
if [ $count1 == $count2 ]; then
a=( $1 ); b=( $2 )
res=""
for (( i=0 ; i<$count1 ; i++ )); do
ai=${a[$i]}; bi=${b[$i]}
tmp=`echo "if ($ai > $bi) $ai else $bi " | bc -l`
res="$res $tmp"
done
echo $res
else
echo $1
fi
}
function test_current
{
rev=$1
scalar=$2
name=$3
prev=""
if [ -e "$name.backup" ]; then
prev=`grep $rev "$name.backup" | cut -d ' ' -f 2-`
fi
res=$prev
count_rev=`echo $prev | wc -w`
count_ref=`cat $settings_file | wc -l`
if echo "$global_args" | grep "$rev" > /dev/null; then
rev_found=true
else
rev_found=false
fi
# echo $update et $selected et $rev_found because $rev et "$global_args"
# echo $count_rev et $count_ref
if $update || [ $count_rev != $count_ref ] || ( $selected && $rev_found ); then
echo "RUN: $CXX -O3 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src $bench.cpp -DSCALAR=$scalar -o $name"
if $CXX -O3 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src $bench.cpp -DSCALAR=$scalar -o $name; then
curr=`./$name $settings_file`
if [ $count_rev == $count_ref ]; then
echo "merge previous $prev"
echo "with new $curr"
else
echo "got $curr"
fi
res=`merge "$curr" "$prev"`
# echo $res
echo "$rev $res" >> $name.out
else
echo "Compilation failed, skip rev $rev"
fi
else
echo "Skip existing results for $rev / $name"
echo "$rev $res" >> $name.out
fi
}
make_backup $WORKING_DIR_PREFIX"s"$bench
make_backup $WORKING_DIR_PREFIX"d"$bench
make_backup $WORKING_DIR_PREFIX"c"$bench
cut -f1 -d"#" < changesets.txt | grep -E '[[:alnum:]]' | while read rev
do
if [ ! -z '$rev' ]; then
rev2=`echo $rev | cut -f 2 -d':'`
echo "Testing rev $rev, $rev2"
cd eigen_src
git checkout $rev2 > /dev/null
actual_rev=`git rev-parse --short HEAD`
cd ..
test_current $actual_rev float $WORKING_DIR_PREFIX"s"$bench
test_current $actual_rev double $WORKING_DIR_PREFIX"d"$bench
test_current $actual_rev "std::complex<double>" $WORKING_DIR_PREFIX"c"$bench
fi
done
echo "Float:"
cat $WORKING_DIR_PREFIX"s""$bench.out"
echo " "
echo "Double:"
cat $WORKING_DIR_PREFIX"d""$bench.out"
echo ""
echo "Complex:"
cat $WORKING_DIR_PREFIX"c""$bench.out"
echo ""
if $do_plot ; then
./make_plot.sh $WORKING_DIR_PREFIX"s"$bench $bench $settings_file
./make_plot.sh $WORKING_DIR_PREFIX"d"$bench $bench $settings_file
./make_plot.sh $WORKING_DIR_PREFIX"c"$bench $bench $settings_file
fi

View File

@@ -0,0 +1,72 @@
#!/bin/bash
# ./runall.sh "Title"
# Examples of environment variables to be set:
# PREFIX="haswell-fma-"
# CXX_FLAGS="-mfma"
# CXX=clang++
# Options:
# -up : enforce the recomputation of existing data, and keep best results as a merging strategy
# -s : recompute selected changesets only and keep bests
# -np : no plotting of results, just generate the data
if [[ "$*" =~ '-np' ]]; then
do_plot=false
else
do_plot=true
fi
./run.sh gemm gemm_settings.txt $*
./run.sh lazy_gemm lazy_gemm_settings.txt $*
./run.sh gemv gemv_settings.txt $*
./run.sh gemvt gemv_settings.txt $*
./run.sh trmv_up gemv_square_settings.txt $*
./run.sh trmv_lo gemv_square_settings.txt $*
./run.sh trmv_upt gemv_square_settings.txt $*
./run.sh trmv_lot gemv_square_settings.txt $*
./run.sh llt gemm_square_settings.txt $*
if $do_plot ; then
# generate html file
function print_td {
echo '<td><a href="'$PREFIX'-'$1"$2"'.html"><img src="'$PREFIX'-'$1"$2"'.png" title="'$3'"></a></td>' >> $htmlfile
}
function print_tr {
echo '<tr><th colspan="3">'"$2"'</th></tr>' >> $htmlfile
echo '<tr>' >> $htmlfile
print_td s $1 float
print_td d $1 double
print_td c $1 complex
echo '</tr>' >> $htmlfile
}
if [ -n "$PREFIX" ]; then
cp resources/s1.js $PREFIX/
cp resources/s2.js $PREFIX/
htmlfile="$PREFIX/index.html"
cat resources/header.html > $htmlfile
echo '<h1>'$1'</h1>' >> $htmlfile
echo '<table>' >> $htmlfile
print_tr gemm 'C += A &middot; B &nbsp; (gemm)'
print_tr lazy_gemm 'C += A &middot; B &nbsp; (gemm lazy)'
print_tr gemv 'y += A &middot; x &nbsp; (gemv)'
print_tr gemvt 'y += A<sup>T</sup> &middot; x &nbsp; (gemv)'
print_tr trmv_up 'y += U &middot; x &nbsp; (trmv)'
print_tr trmv_upt 'y += U<sup>T</sup> &middot; x &nbsp; (trmv)'
print_tr trmv_lo 'y += L &middot; x &nbsp; (trmv)'
print_tr trmv_lot 'y += L<sup>T</sup> &middot; x &nbsp; (trmv)'
print_tr trmv_lot 'L &middot; L<sup>T<sup> = A &nbsp; (Cholesky,potrf)'
cat resources/footer.html >> $htmlfile
fi
fi

View File

@@ -0,0 +1,12 @@
#include "gemv_common.h"
EIGEN_DONT_INLINE
void trmv(const Mat &A, const Vec &B, Vec &C)
{
C.noalias() += A.triangularView<Lower>() * B;
}
int main(int argc, char **argv)
{
return main_gemv(argc, argv, trmv);
}

View File

@@ -0,0 +1,12 @@
#include "gemv_common.h"
EIGEN_DONT_INLINE
void trmv(const Mat &A, Vec &B, const Vec &C)
{
B.noalias() += A.transpose().triangularView<Lower>() * C;
}
int main(int argc, char **argv)
{
return main_gemv(argc, argv, trmv);
}

View File

@@ -0,0 +1,12 @@
#include "gemv_common.h"
EIGEN_DONT_INLINE
void trmv(const Mat &A, const Vec &B, Vec &C)
{
C.noalias() += A.triangularView<Upper>() * B;
}
int main(int argc, char **argv)
{
return main_gemv(argc, argv, trmv);
}

View File

@@ -0,0 +1,12 @@
#include "gemv_common.h"
EIGEN_DONT_INLINE
void trmv(const Mat &A, Vec &B, const Vec &C)
{
B.noalias() += A.transpose().triangularView<Upper>() * C;
}
int main(int argc, char **argv)
{
return main_gemv(argc, argv, trmv);
}