Commit 56a06b7c authored by Pierre Aubert's avatar Pierre Aubert
Browse files

Add base of gpu c++17 test and intrinsics version of Gray Scott

parent 45b080b5
......@@ -20,11 +20,25 @@ pull_extra_module("MicroBenchmark" "https://gitlab.in2p3.fr/CTA-LAPP/PHOENIX_LIB
pull_extra_module("TensorAlloc" "https://gitlab.in2p3.fr/CTA-LAPP/PHOENIX_LIBS/TensorAlloc.git")
pull_extra_module("PhoenixPNG" "https://gitlab.in2p3.fr/CTA-LAPP/PHOENIX_LIBS/PhoenixPNG.git")
pull_extra_module("IntrinsicsGenerator" "https://gitlab.in2p3.fr/CTA-LAPP/PHOENIX_LIBS/IntrinsicsGenerator.git")
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src ${TBB_TBB_INCLUDE_DIRS})
if(GPU_MODE)
message(STATUS "enable GPU mode : GPU_MODE = yes")
if(DEFINED ENV{NVCPP})
message(STATUS "Use nvc++ compiler at $ENV{NVCPP}")
set(CMAKE_CXX_COMPILER $ENV{NVCPP})
else()
message(FATAL_ERROR "You have to scecify an environnement variable NVCPP which points to your nvc++ compiler")
endif()
else()
message(STATUS "GPU mode disabled : GPU_MODE = no")
endif()
add_subdirectory(src)
if(SELF_TESTS_MODE)
......
This diff is collapsed.
......@@ -29,7 +29,18 @@ https://gitlab.in2p3.fr/CTA-LAPP/PHOENIX_LIBS/PhoenixPerformance
$ make
$ make plot_all
# Performances with nvc++
With the environnement variable **NVCPP** pointing to your nvc++ compiler.
$ git clone https://gitlab.in2p3.fr/CTA-LAPP/PHOENIX_LIBS/PhoenixPerformance.git
$ cd PhoenixPerformance
$ mkdir build
$ cd build
$ cmake .. -DGPU_MODE=yes -DCMAKE_CXX_COMPILER=$NVCPP
$ make
$ make plot_all
......@@ -36,7 +36,11 @@ function(phoenix_base_project programName programVersion programDescritpion prog
# --std=c++2a is to enable C++20
set(CMAKE_CXX_FLAGS "--std=c++2a -Wall -Werror -g -O0 -fprofile-arcs -ftest-coverage" PARENT_SCOPE)
else()
set(CMAKE_CXX_FLAGS "--std=c++2a -Wall -Werror -g -O2" PARENT_SCOPE)
if(GPU_MODE)
set(CMAKE_CXX_FLAGS "--std=c++17 -Wall -Werror -g -O2" PARENT_SCOPE)
else()
set(CMAKE_CXX_FLAGS "--std=c++2a -Wall -Werror -g -O2" PARENT_SCOPE)
endif()
endif()
add_definitions(-D__PROGRAM_VERSION__="${PROGRAM_VERSION}")
......
......@@ -6,5 +6,5 @@ NVCOMPILERS=/opt/nvidia/hpc_sdk; export NVCOMPILERS
MANPATH=$MANPATH:$NVCOMPILERS/$NVARCH/20.11/compilers/man; export MANPATH
PATH=$NVCOMPILERS/$NVARCH/20.11/compilers/bin:$PATH; export PATH
NVCPP=$NVCOMPILERS/$NVARCH/20.11/compilers/bin/nvc++; export NVCPP
project(Phoenix)
cmake_minimum_required(VERSION 2.8)
add_subdirectory(Sequential)
add_subdirectory(Parallel)
if(GPU_MODE)
add_subdirectory(NVCPP)
else()
add_subdirectory(Sequential)
add_subdirectory(Parallel)
endif()
project(Phoenix)
cmake_minimum_required(VERSION 2.8)
add_subdirectory(hadamard)
project(Phoenix)
cmake_minimum_required(VERSION 2.8)
set(CONFIG_GPU_HADAMARD "1000, 2000, 3000, 4000, 5000, 10000, 50000, 100000, 200000, 500000, 1000000, 5000000, 10000000")
set(progSrc hadamard.cpp main.cpp)
phoenix_compileAndRunExample(perf_hadamard_gpupar_O0 "-O0" "${CONFIG_GPU_HADAMARD}" ${progSrc})
phoenix_compileAndRunExample(perf_hadamard_gpupar_O1 "-O1" "${CONFIG_GPU_HADAMARD}" ${progSrc})
phoenix_compileAndRunExample(perf_hadamard_gpupar_O2 "-O2" "${CONFIG_GPU_HADAMARD}" ${progSrc})
phoenix_compileAndRunExample(perf_hadamard_gpupar_O3 "-O3" "${CONFIG_GPU_HADAMARD}" ${progSrc})
phoenix_compileAndRunExample(perf_hadamard_gpupar_O4 "-O4" "${CONFIG_GPU_HADAMARD}" ${progSrc})
phoenix_compileAndRunExample(perf_hadamard_gpupar_vectorize_O3 "-O3 -mavx2" "${CONFIG_GPU_HADAMARD}" ${progSrc})
phoenix_compileAndRunExample(perf_hadamard_gpupar_vectorize_O4 "-O4 -mavx2" "${CONFIG_GPU_HADAMARD}" ${progSrc})
set(GPU_EXTRA_FLAGS " -stdpar -gpu=cc35")
# phoenix_compileAndRunExample(perf_hadamard_gpu_stdpar_par_vectorize_O3 "-O3 ${GPU_EXTRA_FLAGS} -mavx2" "${CONFIG_GPU_HADAMARD}" ${progSrc})
# phoenix_compileAndRunExample(perf_hadamard_gpu_stdpar_par_vectorize_O4 "-O4 ${GPU_EXTRA_FLAGS} -mavx2" "${CONFIG_GPU_HADAMARD}" ${progSrc})
phoenix_plotPerfLogX("hadamard_gpuparBase" perf_hadamard_gpupar_O0 perf_hadamard_gpupar_O1 perf_hadamard_gpupar_O2 perf_hadamard_gpupar_O3 perf_hadamard_gpupar_O4)
phoenix_plotPerfLogX("hadamard_gpuparParallelGPU" perf_hadamard_gpupar_O3 perf_hadamard_gpupar_vectorize_O3 perf_hadamard_gpupar_vectorize_O4
# perf_hadamard_gpu_stdpar_par_vectorize_O3 perf_hadamard_gpu_stdpar_par_vectorize_O4
)
/***************************************
Auteur : Pierre Aubert
Mail : aubertp7@gmail.com
Licence : CeCILL-C
****************************************/
//some doc at : https://en.cppreference.com/w/cpp/header/algorithm
#include <algorithm>
//Some doc at : https://en.cppreference.com/w/cpp/header/execution
#include <execution>
#include "hadamard.h"
///Do a classical hadamard product
/** @param[out] tabRes : table of the result
* @param tabX : talbe of x values
* @param tabY : table of y values
* @param nbElement : number of elements in the tables
*/
void hadamard_product(float * tabRes, const float* tabX, const float* tabY, size_t nbElement){
std::transform(std::execution::par, tabX, tabX + nbElement, tabY, tabRes,
[=](float xi, float yi){ return xi * yi; });
}
/***************************************
Auteur : Pierre Aubert
Mail : aubertp7@gmail.com
Licence : CeCILL-C
****************************************/
#ifndef __HADAMARD_PRODUCT_H__
#define __HADAMARD_PRODUCT_H__
#include <iostream>
void hadamard_product(float * tabRes, const float* tabX, const float* tabY, size_t nbElement);
#endif
/***************************************
Auteur : Pierre Aubert
Mail : aubertp7@gmail.com
Licence : CeCILL-C
****************************************/
#include <iostream>
#include "micro_benchmark.h"
#include "PTensor.h"
#include "hadamard.h"
///Get the number of nanoseconds per elements
/** @param nbElement : number of elements of the tables
*/
void evaluateHadamardProduct(size_t nbElement){
PTensor<float> tabX(AllocMode::ALIGNED, nbElement);
PTensor<float> tabY(AllocMode::ALIGNED, nbElement);
PTensor<float> tabRes(AllocMode::ALIGNED, nbElement);
for(size_t i(0lu); i < nbElement; ++i){
tabX[i] = i*19lu%11;
tabY[i] = i*27lu%19;
}
micro_benchmarkAutoNsPrint("evaluate hadamard", nbElement, hadamard_product, tabRes.getData(), tabX.getData(), tabY.getData(), nbElement);
}
int main(int argc, char** argv){
return micro_benchmarkParseArg(argc, argv, evaluateHadamardProduct);
}
project(Phoenix)
cmake_minimum_required(VERSION 2.8)
file(GLOB mainSource "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp")
add_library(gray_scott_intrinsics SHARED ${mainSource})
set_property(TARGET gray_scott_intrinsics PROPERTY COMPILE_FLAGS "-O3 -ftree-vectorize -march=native -mtune=native -mavx2")
target_link_libraries(gray_scott_intrinsics TBB::tbb)
/***************************************
Auteur : Pierre Aubert
Mail : aubertp7@gmail.com
Licence : CeCILL-C
****************************************/
#include "phoenix_intrinsics.h"
#include <algorithm>
#include "intrinsics_propagation.h"
///Propagate the U and V species in the matVecVecU and matVecV
/** @param[out] outMatVecVecU : updated matrix U version (with vectorial neighbours)
* @param[out] outMatVecV : updated matrix V version (with vectorial neighbours)
* @param matVecVecU : input of matrix U (with vectorial neighbours)
* @param matVecV : input of matrix V (with vectorial neighbours)
* @param nbRow : number of rows of the matrices
* @param nbCol : number of columns of the matrices
* @param padding : padding of the columns of all matrices
* @param matBroadcastDeltaSquare : matrix of the delta square values (with broadcast neighbours)
* @param nbStencilRow : number of rows of the matrix matBroadcastDeltaSquare
* @param nbStencilCol : number of columns of the matrix matBroadcastDeltaSquare
* @param diffudionRateU : diffusion rate of the U specie
* @param diffudionRateV : diffusion rate of the V specie
* @param feedRate : rate of the process which feeds U and drains U, V and P
* @param killRate : rate of the process which converts V into P
* @param dt : time interval between two steps
*/
void grayscott_propagation(float * outMatVecVecU, float * outMatVecV, const float * matVecVecU, const float * matVecV, long nbRow, long nbCol, long padding,
const float * matBroadcastDeltaSquare, long nbStencilRow, long nbStencilCol,
float diffudionRateU, float diffusionRateV, float feedRate, float killRate, float dt)
{
long offsetStencilRow((nbStencilRow - 1l)/2l);
long offsetStencilCol((nbStencilCol - 1l)/2l);
size_t nbVecCol(nbCol/PLIB_VECTOR_SIZE_FLOAT);
for(long i(0l); i < nbRow; ++i){
long firstRowStencil(std::max(i - offsetStencilRow, 0l));
long lastRowStencil(std::min(i + offsetStencilRow + 1l, nbRow));
for(long j(0l); j < nbCol; ++j){
long firstColStencil(std::max(j - offsetStencilCol, 0l));
long lastColStencil(std::min(j + offsetStencilCol + 1l, nbCol));
long stencilIndexRow(0l);
float u(matVecVecU[i*nbCol + j]), v(matVecV[i*nbCol + j]);
float fullU(0.0f), fullV(0.0f);
for(long k(firstRowStencil); k < lastRowStencil; ++k){
long stencilIndexCol(0l);
for(long l(firstColStencil); l < lastColStencil; ++l){
float deltaSquare(matBroadcastDeltaSquare[stencilIndexRow*nbStencilCol + stencilIndexCol]);
fullU += (matVecVecU[k*nbCol + l] - u)*deltaSquare;
fullV += (matVecV[k*nbCol + l] - v)*deltaSquare;
++stencilIndexCol;
}
++stencilIndexRow;
}
float uvSquare(u*v*v);
float du(diffudionRateU*fullU - uvSquare + feedRate*(1.0f - u));
float dv(diffusionRateV*fullV + uvSquare - (feedRate + killRate)*v);
outMatVecVecU[i*nbCol + j] = u + du*dt;
outMatVecV[i*nbCol + j] = v + dv*dt;
}
}
}
/***************************************
Auteur : Pierre Aubert
Mail : aubertp7@gmail.com
Licence : CeCILL-C
****************************************/
#ifndef __INTRINSICS_PROPAGATION_H__
#define __INTRINSICS_PROPAGATION_H__
#include <iostream>
void grayscott_propagation(float * outMatU, float * outMatV, const float * matU, const float * matV, long nbRow, long nbCol, long padding,
const float * matDeltaSquare, long nbStencilRow, long nbStencilCol,
float diffudionRateU, float diffusionRateV, float feedRate, float killRate, float dt);
#endif
......@@ -46,7 +46,6 @@ void grayscott_propagation(float * outMatU, float * outMatV, const float * matU,
long stencilIndexCol(0l);
for(long l(firstColStencil); l < lastColStencil; ++l){
float deltaSquare(matDeltaSquare[stencilIndexRow*nbStencilCol + stencilIndexCol]);
// float deltaSquare(1.0f);
fullU += (matU[k*nbCol + l] - u)*deltaSquare;
fullV += (matV[k*nbCol + l] - v)*deltaSquare;
++stencilIndexCol;
......@@ -54,8 +53,8 @@ void grayscott_propagation(float * outMatU, float * outMatV, const float * matU,
++stencilIndexRow;
}
float uvSquare(u*v*v);
float du(diffudionRateU*fullU/**u*/ - uvSquare + feedRate*(1.0f - u));
float dv(diffusionRateV*fullV/**v*/ + uvSquare - (feedRate + killRate)*v);
float du(diffudionRateU*fullU - uvSquare + feedRate*(1.0f - u));
float dv(diffusionRateV*fullV + uvSquare - (feedRate + killRate)*v);
outMatU[i*nbCol + j] = u + du*dt;
outMatV[i*nbCol + j] = v + dv*dt;
......
......@@ -60,8 +60,8 @@ void grayscott_propagation(float * __restrict__ poutMatU, float * __restrict__ p
++stencilIndexRow;
}
float uvSquare(u*v*v);
float du(diffudionRateU*fullU/**u*/ - uvSquare + feedRate*(1.0f - u));
float dv(diffusionRateV*fullV/**v*/ + uvSquare - (feedRate + killRate)*v);
float du(diffudionRateU*fullU - uvSquare + feedRate*(1.0f - u));
float dv(diffusionRateV*fullV + uvSquare - (feedRate + killRate)*v);
outMatU[i*nbCol + j] = u + du*dt;
outMatV[i*nbCol + j] = v + dv*dt;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment