Commit 45b080b5 authored by Pierre Aubert's avatar Pierre Aubert
Browse files

Really poor vectorisation

parent fc271c6b
set terminal png notransparent crop enhanced size 800,600 font "arial,14"
set grid xtics ytics mytics
set key out vert center top
set logscale y
set xlabel "nb elements"
set ylabel "elapsed time per element [ns/el]"
set output "grayscott_seqVectorizeElapsedTimeCyEl.png"
plot "perf_grayscott_seq_O3.txt" using 1:2:($2-$4):($2+$4) title "perf grayscott seq O3" with yerrorlines lw 2,"perf_grayscott_seq_vectorize_O1.txt" using 1:2:($2-$4):($2+$4) title "perf grayscott seq vectorize O1" with yerrorlines lw 2,"perf_grayscott_seq_vectorize_O2.txt" using 1:2:($2-$4):($2+$4) title "perf grayscott seq vectorize O2" with yerrorlines lw 2,"perf_grayscott_seq_vectorize_O3.txt" using 1:2:($2-$4):($2+$4) title "perf grayscott seq vectorize O3" with yerrorlines lw 2,"perf_grayscott_seq_vectorize_Ofast.txt" using 1:2:($2-$4):($2+$4) title "perf grayscott seq vectorize Ofast" with yerrorlines lw 2,
set xlabel "nb elements"
set ylabel "elapsed time [ns]"
set output "grayscott_seqVectorizeElapsedTime.png"
plot "perf_grayscott_seq_O3.txt" using 1:3:($3-$5):($3+$5) title "perf grayscott seq O3" with yerrorlines lw 2,"perf_grayscott_seq_vectorize_O1.txt" using 1:3:($3-$5):($3+$5) title "perf grayscott seq vectorize O1" with yerrorlines lw 2,"perf_grayscott_seq_vectorize_O2.txt" using 1:3:($3-$5):($3+$5) title "perf grayscott seq vectorize O2" with yerrorlines lw 2,"perf_grayscott_seq_vectorize_O3.txt" using 1:3:($3-$5):($3+$5) title "perf grayscott seq vectorize O3" with yerrorlines lw 2,"perf_grayscott_seq_vectorize_Ofast.txt" using 1:3:($3-$5):($3+$5) title "perf grayscott seq vectorize Ofast" with yerrorlines lw 2,
10000 67.8324 678324 0.172832 1728.32
19881 67.9402 1.35072e+06 0.101376 2015.47
29929 67.9377 2.03331e+06 0.11689 3498.39
40000 71.6361 2.86544e+06 0.101214 4048.55
49952 71.7628 3.5847e+06 0.113118 5650.48
59780 72.0097 4.30474e+06 0.0969996 5798.64
79806 71.4927 5.70554e+06 0.0963849 7692.09
99856 71.5524 7.14494e+06 0.0753253 7521.68
10000 62.1077 621077 0.0584005 584.005
19881 61.9654 1.23193e+06 0.334577 6651.73
29929 62.1748 1.86083e+06 0.24052 7198.51
40000 66.2938 2.65175e+06 0.207202 8288.09
49952 65.5634 3.27502e+06 0.278079 13890.6
59780 65.765 3.93143e+06 0.131601 7867.14
79806 65.7433 5.24671e+06 0.12749 10174.5
99856 65.7315 6.56369e+06 0.0937255 9359.05
10000 64.5399 645399 0.217797 2177.97
19881 64.5406 1.28313e+06 0.22883 4549.37
29929 64.7557 1.93807e+06 0.229006 6853.91
40000 67.8632 2.71453e+06 0.0433138 1732.55
49952 67.8583 3.38966e+06 0.0941192 4701.44
59780 67.7785 4.0518e+06 0.0851416 5089.76
79806 68.0185 5.42829e+06 0.0385827 3079.13
99856 67.9429 6.78451e+06 0.0462766 4621
10000 64.1112 641112 0.0313317 313.317
19881 64.3169 1.27868e+06 0.0434065 862.966
29929 64.3947 1.92727e+06 0.0399282 1195.01
40000 67.2377 2.68951e+06 0.143486 5739.42
49952 67.2324 3.35839e+06 0.34806 17386.3
59780 67.1337 4.01325e+06 0.0268174 1603.14
79806 68.1601 5.43959e+06 1.09157 87113.8
99856 67.316 6.72191e+06 0.0202938 2026.46
...@@ -13,11 +13,15 @@ phoenix_compileAndRunExample(perf_grayscott_seq_O2 "-O2" "${CONFIG_GRAYSCOTT}" $ ...@@ -13,11 +13,15 @@ phoenix_compileAndRunExample(perf_grayscott_seq_O2 "-O2" "${CONFIG_GRAYSCOTT}" $
phoenix_compileAndRunExample(perf_grayscott_seq_O3 "-O3" "${CONFIG_GRAYSCOTT}" ${progNaiveSrc}) phoenix_compileAndRunExample(perf_grayscott_seq_O3 "-O3" "${CONFIG_GRAYSCOTT}" ${progNaiveSrc})
phoenix_compileAndRunExample(perf_grayscott_seq_Ofast "-Ofast" "${CONFIG_GRAYSCOTT}" ${progNaiveSrc}) phoenix_compileAndRunExample(perf_grayscott_seq_Ofast "-Ofast" "${CONFIG_GRAYSCOTT}" ${progNaiveSrc})
# phoenix_compileAndRunExample(perf_grayscott_seq_vectorize_O3 "-O3 -ftree-vectorize -march=native -mtune=native -mavx2" "${CONFIG_GRAYSCOTT}" ${progSrc}) set(progVectorizeSrc ${CMAKE_CURRENT_SOURCE_DIR}/../Vectorized/vectorized_propagation.cpp main_vectorized.cpp)
# phoenix_compileAndRunExample(perf_grayscott_seq_vectorize_Ofast "-Ofast -ftree-vectorize -march=native -mtune=native -mavx2" "${CONFIG_GRAYSCOTT}" ${progSrc})
phoenix_compileAndRunExample(perf_grayscott_seq_vectorize_O1 "-O1 -ftree-vectorize -march=native -mtune=native -mavx2" "${CONFIG_GRAYSCOTT}" ${progVectorizeSrc})
phoenix_compileAndRunExample(perf_grayscott_seq_vectorize_O2 "-O2 -ftree-vectorize -march=native -mtune=native -mavx2" "${CONFIG_GRAYSCOTT}" ${progVectorizeSrc})
phoenix_compileAndRunExample(perf_grayscott_seq_vectorize_O3 "-O3 -ftree-vectorize -march=native -mtune=native -mavx2" "${CONFIG_GRAYSCOTT}" ${progVectorizeSrc})
phoenix_compileAndRunExample(perf_grayscott_seq_vectorize_Ofast "-Ofast -ftree-vectorize -march=native -mtune=native -mavx2" "${CONFIG_GRAYSCOTT}" ${progVectorizeSrc})
phoenix_plotPerf("grayscott_seqBase" perf_grayscott_seq_O0 perf_grayscott_seq_O1 perf_grayscott_seq_O2 perf_grayscott_seq_O3 perf_grayscott_seq_Ofast) phoenix_plotPerf("grayscott_seqBase" perf_grayscott_seq_O0 perf_grayscott_seq_O1 perf_grayscott_seq_O2 perf_grayscott_seq_O3 perf_grayscott_seq_Ofast)
# phoenix_plotPerf("grayscott_seqVectorize" perf_saxpy_seq_O3 perf_saxpy_seq_vectorize_O3 perf_saxpy_seq_vectorize_Ofast) phoenix_plotPerf("grayscott_seqVectorize" perf_grayscott_seq_O3 perf_grayscott_seq_vectorize_O1 perf_grayscott_seq_vectorize_O2 perf_grayscott_seq_vectorize_O3 perf_grayscott_seq_vectorize_Ofast)
/***************************************
Auteur : Pierre Aubert
Mail : aubertp7@gmail.com
Licence : CeCILL-C
****************************************/
#include <cmath>
#include "micro_benchmark.h"
#include "temporary_alloc.h"
#include "vectorized_propagation.h"
///Get the number of nanoseconds per elements
/** @param nbElement : number of elements of the tables
*/
void evaluateSaxpy(size_t nbElement){
size_t nbRow(std::sqrt(nbElement));
size_t nbCol(nbElement/nbRow);
nbElement = nbRow*nbCol;
PTensor<float> tmpInU, tmpInV, tmpOutU, tmpOutV;
float *tmpU1 = NULL, *tmpU2 = NULL, *tmpV1 = NULL, *tmpV2 = NULL;
allocate_temporary(tmpU1, tmpU2, tmpV1, tmpV2, tmpInU, tmpInV, tmpOutU, tmpOutV, nbRow, nbCol);
float diffudionRateU(0.1f), diffusionRateV(0.05f);
float killRate(0.054f), feedRate(0.014f), dt(1.0f);
long nbStencilRow(3l), nbStencilCol(3l);
float matDeltaSquare[] = {1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f};
micro_benchmarkAutoNsPrint("evaluate GrayScott reaction", nbElement, grayscott_propagation,
tmpU2, tmpV2, tmpU1, tmpV1, nbRow, nbCol,
matDeltaSquare, nbStencilRow, nbStencilCol,
diffudionRateU, diffusionRateV, feedRate, killRate, dt);
}
int main(int argc, char** argv){
return micro_benchmarkParseArg(argc, argv, evaluateSaxpy);
}
project(Phoenix) project(Phoenix)
cmake_minimum_required(VERSION 2.8) cmake_minimum_required(VERSION 2.8)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/Naive ${CMAKE_CURRENT_SOURCE_DIR}/GrayScottDataFormat) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/Naive
${CMAKE_CURRENT_SOURCE_DIR}/Vectorized
${CMAKE_CURRENT_SOURCE_DIR}/GrayScottDataFormat)
add_subdirectory(Naive) add_subdirectory(Naive)
add_subdirectory(Vectorized)
add_subdirectory(GrayScottDataFormat) add_subdirectory(GrayScottDataFormat)
add_subdirectory(Program) add_subdirectory(Program)
add_subdirectory(GrayScott2Pic) add_subdirectory(GrayScott2Pic)
......
project(Phoenix)
cmake_minimum_required(VERSION 2.8)
file(GLOB mainSource "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp")
add_library(gray_scott_vectorized SHARED ${mainSource})
set_property(TARGET gray_scott_vectorized PROPERTY COMPILE_FLAGS "-O3 -ftree-vectorize -march=native -mtune=native -mavx2")
target_link_libraries(gray_scott_vectorized TBB::tbb)
/***************************************
Auteur : Pierre Aubert
Mail : aubertp7@gmail.com
Licence : CeCILL-C
****************************************/
#include <algorithm>
#include "vectorized_propagation.h"
#define FLOAT_VECTOR_ALIGNEMENT 32
///Propagate the U and V species in the matU and matV
/** @param[out] poutMatU : updated matrix U version
* @param[out] poutMatV : updated matrix V version
* @param pmatU : input of matrix U
* @param pmatV : input of matrix V
* @param nbRow : number of rows of the matrices
* @param nbCol : number of columns of the matrices
* @param matDeltaSquare : matrix of the delta square values
* @param nbStencilRow : number of rows of the matrix matDeltaSquare
* @param nbStencilCol : number of columns of the matrix matDeltaSquare
* @param diffudionRateU : diffusion rate of the U specie
* @param diffudionRateV : diffusion rate of the V specie
* @param feedRate : rate of the process which feeds U and drains U, V and P
* @param killRate : rate of the process which converts V into P
* @param dt : time interval between two steps
*/
void grayscott_propagation(float * __restrict__ poutMatU, float * __restrict__ poutMatV, const float * __restrict__ pmatU, const float * __restrict__ pmatV,
long nbRow, long nbCol,
const float * matDeltaSquare, long nbStencilRow, long nbStencilCol,
float diffudionRateU, float diffusionRateV, float feedRate, float killRate, float dt)
{
const float* matU = (const float*)__builtin_assume_aligned(pmatU, FLOAT_VECTOR_ALIGNEMENT);
const float* matV = (const float*)__builtin_assume_aligned(pmatV, FLOAT_VECTOR_ALIGNEMENT);
float* outMatU = (float*)__builtin_assume_aligned(poutMatU, FLOAT_VECTOR_ALIGNEMENT);
float* outMatV = (float*)__builtin_assume_aligned(poutMatV, FLOAT_VECTOR_ALIGNEMENT);
long offsetStencilRow((nbStencilRow - 1l)/2l);
long offsetStencilCol((nbStencilCol - 1l)/2l);
for(long i(0l); i < nbRow; ++i){
long firstRowStencil(std::max(i - offsetStencilRow, 0l));
long lastRowStencil(std::min(i + offsetStencilRow + 1l, nbRow));
for(long j(0l); j < nbCol; ++j){
long firstColStencil(std::max(j - offsetStencilCol, 0l));
long lastColStencil(std::min(j + offsetStencilCol + 1l, nbCol));
long stencilIndexRow(0l);
float u(matU[i*nbCol + j]), v(matV[i*nbCol + j]);
float fullU(0.0f), fullV(0.0f);
for(long k(firstRowStencil); k < lastRowStencil; ++k){
long stencilIndexCol(0l);
for(long l(firstColStencil); l < lastColStencil; ++l){
float deltaSquare(matDeltaSquare[stencilIndexRow*nbStencilCol + stencilIndexCol]);
// float deltaSquare(1.0f);
fullU += (matU[k*nbCol + l] - u)*deltaSquare;
fullV += (matV[k*nbCol + l] - v)*deltaSquare;
++stencilIndexCol;
}
++stencilIndexRow;
}
float uvSquare(u*v*v);
float du(diffudionRateU*fullU/**u*/ - uvSquare + feedRate*(1.0f - u));
float dv(diffusionRateV*fullV/**v*/ + uvSquare - (feedRate + killRate)*v);
outMatU[i*nbCol + j] = u + du*dt;
outMatV[i*nbCol + j] = v + dv*dt;
}
}
}
/***************************************
Auteur : Pierre Aubert
Mail : aubertp7@gmail.com
Licence : CeCILL-C
****************************************/
#ifndef __VECTORIZED_PROPAGATION_H__
#define __VECTORIZED_PROPAGATION_H__
#include <iostream>
void grayscott_propagation(float * __restrict__ poutMatU, float * __restrict__ poutMatV, const float * __restrict__ pmatU, const float * __restrict__ pmatV,
long nbRow, long nbCol,
const float * matDeltaSquare, long nbStencilRow, long nbStencilCol,
float diffudionRateU, float diffusionRateV, float feedRate, float killRate, float dt);
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment