Commit 2abeacc7 authored by Pierre Aubert's avatar Pierre Aubert
Browse files

Add performances test to ckeck performances of generated proxy library

parent 26ad01c0
......@@ -19,6 +19,7 @@ add_subdirectory(src)
if(SELF_TESTS_MODE)
include(CTest)
pull_extra_module("MicroBenchmark" "https://gitlab.in2p3.fr/CTA-LAPP/PHOENIX_LIBS/MicroBenchmark.git")
add_subdirectory(TESTS)
endif(SELF_TESTS_MODE)
......
......@@ -3,4 +3,5 @@ cmake_minimum_required(VERSION 2.8)
add_subdirectory(TEST_BASE_HEADER)
add_subdirectory(TEST_PROXY_LIB)
add_subdirectory(PERFORMANCE_TESTS)
project(Phoenix)
cmake_minimum_required(VERSION 2.8)
add_library(allocation_library SHARED custom_malloc.cpp)
/***************************************
Auteur : Pierre Aubert
Mail : aubertp7@gmail.com
Licence : CeCILL-C
****************************************/
#ifndef __APPLE__
# include <malloc.h>
#else
# include <stdlib.h>
#endif
#include "custom_malloc.h"
#define VECTOR_ALIGNEMENT 64lu
#ifdef __APPLE__
///Alloc an aligned vector
/** @param sizeOfVectorInBytes : size of the vector xe want to allocate
* @param alignementInBytes : alignement of the vector we want to allocate
* @return aligned pointor of the vector
*/
void * memalign(long unsigned int alignementInBytes, long unsigned int sizeOfVectorInBytes){
void * ptr = NULL;
posix_memalign(&ptr, alignementInBytes, sizeOfVectorInBytes);
return ptr;
}
#endif
///Do the aligned allocation of a pointer
/** @param sizeOfVectorInBytes : number of bytes to be allocated
* @return allocated pointer
*/
void * custom_aligned_malloc(long unsigned int sizeOfVectorInBytes){
return memalign(VECTOR_ALIGNEMENT, sizeOfVectorInBytes);
}
///Free an aligned pointer
/** @param ptr : ptr to be freed
*/
void custom_aligned_free(void* ptr){
free(ptr);
}
/***************************************
Auteur : Pierre Aubert
Mail : aubertp7@gmail.com
Licence : CeCILL-C
****************************************/
#ifndef __CUSTOM_MALLOC_H__
#define __CUSTOM_MALLOC_H__
void * custom_aligned_malloc(long unsigned int sizeOfVectorInBytes);
void custom_aligned_free(void* ptr);
#endif
project(Phoenix)
cmake_minimum_required(VERSION 2.8)
remove_definitions(-O3)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/AllocatationLibrary)
set(CONFIG_HADAMARD_PRODUCT "1000, 2000, 3000, 4000, 5000, 10000")
add_subdirectory(AllocatationLibrary)
add_subdirectory(PERF_HADAMARD)
add_subdirectory(PERF_HADAMARD_PROXY)
phoenix_plotPerf("cmpHadamardProxyBase" perf_hadamard_product_O0 perf_hadamard_product_O2 perf_hadamard_product_O3 perf_proxy_hadamard_product_O0 perf_proxy_hadamard_product_O2 perf_proxy_hadamard_product_O3)
phoenix_plotPerf("cmpHadamardProxyVectorize" perf_hadamard_product_O3 perf_hadamard_product_vectorize_O3 perf_proxy_hadamard_product_O3 perf_proxy_hadamard_product_vectorize_O3)
project(Phoenix)
cmake_minimum_required(VERSION 3.0)
set(EXTRA_DEPENDENCIES allocation_library)
set(progSrc hadamard_product.cpp main.cpp)
phoenix_compileAndRunExample(perf_hadamard_product_O0 "-O0" "${CONFIG_HADAMARD_PRODUCT}" ${progSrc})
phoenix_compileAndRunExample(perf_hadamard_product_O1 "-O1" "${CONFIG_HADAMARD_PRODUCT}" ${progSrc})
phoenix_compileAndRunExample(perf_hadamard_product_O2 "-O2" "${CONFIG_HADAMARD_PRODUCT}" ${progSrc})
phoenix_compileAndRunExample(perf_hadamard_product_O3 "-O3" "${CONFIG_HADAMARD_PRODUCT}" ${progSrc})
phoenix_compileAndRunExample(perf_hadamard_product_Ofast "-Ofast" "${CONFIG_HADAMARD_PRODUCT}" ${progSrc})
phoenix_compileAndRunExample(perf_hadamard_product_vectorize_O3 "-O3 -ftree-vectorize -march=native -mtune=native -mavx2" "${CONFIG_HADAMARD_PRODUCT}" ${progSrc})
phoenix_compileAndRunExample(perf_hadamard_product_vectorize_Ofast "-Ofast -ftree-vectorize -march=native -mtune=native -mavx2" "${CONFIG_HADAMARD_PRODUCT}" ${progSrc})
phoenix_plotPerf("hadamardBase" perf_hadamard_product_O0 perf_hadamard_product_O1 perf_hadamard_product_O2 perf_hadamard_product_O3 perf_hadamard_product_Ofast)
phoenix_plotPerf("hadamardVectorize" perf_hadamard_product_O3 perf_hadamard_product_vectorize_O3 perf_hadamard_product_vectorize_Ofast)
/***************************************
Auteur : Pierre Aubert
Mail : aubertp7@gmail.com
Licence : CeCILL-C
****************************************/
#include "hadamard_product.h"
#define FLOAT_VECTOR_ALIGNEMENT 64
///Do the Hadamard product
/** @param[out] ptabResult : table of results of tabX*tabY
* @param ptabX : input table
* @param ptabY : input table
* @param nbElement : number of elements in the tables
*/
void hadamard_product(float* __restrict__ ptabResult, const float* __restrict__ ptabX, const float* __restrict__ ptabY, long unsigned int nbElement){
const float* tabX = (const float*)__builtin_assume_aligned(ptabX, FLOAT_VECTOR_ALIGNEMENT);
const float* tabY = (const float*)__builtin_assume_aligned(ptabY, FLOAT_VECTOR_ALIGNEMENT);
float* tabResult = (float*)__builtin_assume_aligned(ptabResult, FLOAT_VECTOR_ALIGNEMENT);
for(long unsigned int i(0lu); i < nbElement; ++i){
tabResult[i] = tabX[i]*tabY[i];
}
}
/***************************************
Auteur : Pierre Aubert
Mail : aubertp7@gmail.com
Licence : CeCILL-C
****************************************/
#ifndef __HADAMARD_PRODUCT_H__
#define __HADAMARD_PRODUCT_H__
void hadamard_product(float* __restrict__ ptabResult, const float* __restrict__ ptabX, const float* __restrict__ ptabY, long unsigned int nbElement);
#endif
/***************************************
Auteur : Pierre Aubert
Mail : aubertp7@gmail.com
Licence : CeCILL-C
****************************************/
#include <iostream>
#include "micro_benchmark.h"
#include "custom_malloc.h"
#include "hadamard_product.h"
///Get the number of nanoseconds per elements
/** @param nbElement : number of elements of the tables
*/
void evaluateHadamardProduct(size_t nbElement){
float * tabX = (float*)custom_aligned_malloc(sizeof(float)*nbElement);
float * tabY = (float*)custom_aligned_malloc(sizeof(float)*nbElement);
float * tabRes = (float*)custom_aligned_malloc(sizeof(float)*nbElement);
for(size_t i(0lu); i < nbElement; ++i){
tabX[i] = i*19lu%11;
tabY[i] = i*27lu%19;
}
micro_benchmarkAutoNsPrint("evaluate hadamard product", nbElement, hadamard_product, tabRes, tabX, tabY, nbElement);
custom_aligned_free(tabRes);
custom_aligned_free(tabY);
custom_aligned_free(tabX);
}
int main(int argc, char** argv){
return micro_benchmarkParseArg(argc, argv, evaluateHadamardProduct);
}
project(Phoenix)
cmake_minimum_required(VERSION 2.8)
add_subdirectory(HadamardProductProxy)
include_directories(${CMAKE_CURRENT_BINARY_DIR}/HadamardProductProxy)
set(EXTRA_DEPENDENCIES hadamard_product_perf_proxy allocation_library dl)
set(progSrc main.cpp)
phoenix_compileAndRunExample(perf_proxy_hadamard_product_O0 "-O0" "${CONFIG_HADAMARD_PRODUCT}" ${progSrc})
phoenix_compileAndRunExample(perf_proxy_hadamard_product_O1 "-O1" "${CONFIG_HADAMARD_PRODUCT}" ${progSrc})
phoenix_compileAndRunExample(perf_proxy_hadamard_product_O2 "-O2" "${CONFIG_HADAMARD_PRODUCT}" ${progSrc})
phoenix_compileAndRunExample(perf_proxy_hadamard_product_O3 "-O3" "${CONFIG_HADAMARD_PRODUCT}" ${progSrc})
phoenix_compileAndRunExample(perf_proxy_hadamard_product_Ofast "-Ofast" "${CONFIG_HADAMARD_PRODUCT}" ${progSrc})
phoenix_compileAndRunExample(perf_proxy_hadamard_product_vectorize_O3 "-O3 -ftree-vectorize -march=native -mtune=native -mavx2" "${CONFIG_HADAMARD_PRODUCT}" ${progSrc})
phoenix_compileAndRunExample(perf_proxy_hadamard_product_vectorize_Ofast "-Ofast -ftree-vectorize -march=native -mtune=native -mavx2" "${CONFIG_HADAMARD_PRODUCT}" ${progSrc})
phoenix_plotPerf("hadamardProxyBase" perf_proxy_hadamard_product_O0 perf_proxy_hadamard_product_O1 perf_proxy_hadamard_product_O2 perf_proxy_hadamard_product_O3 perf_proxy_hadamard_product_Ofast)
phoenix_plotPerf("hadamardProxyVectorize" perf_proxy_hadamard_product_O3 perf_proxy_hadamard_product_vectorize_O3 perf_proxy_hadamard_product_vectorize_Ofast)
project(Phoenix)
cmake_minimum_required(VERSION 2.8)
# Let's compile the hadamard product several times to cover architectures from sse2 to avx512f (in -O3 but could be in -O2)
set(HADAMARD_PRODUCT_BASE_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/hadamard_product.cpp)
add_library(hadamard_product_perf_anyarch SHARED ${HADAMARD_PRODUCT_BASE_SOURCE})
set_property(TARGET hadamard_product_perf_anyarch PROPERTY COMPILE_FLAGS "-O3 -DFLOAT_VECTOR_ALIGNEMENT=16")
add_library(hadamard_product_perf_sse2 SHARED ${HADAMARD_PRODUCT_BASE_SOURCE})
set_property(TARGET hadamard_product_perf_sse2 PROPERTY COMPILE_FLAGS "-O3 -ftree-vectorize -march=native -mtune=native -msse2 -DFLOAT_VECTOR_ALIGNEMENT=16")
add_library(hadamard_product_perf_ssse3 SHARED ${HADAMARD_PRODUCT_BASE_SOURCE})
set_property(TARGET hadamard_product_perf_ssse3 PROPERTY COMPILE_FLAGS "-O3 -ftree-vectorize -march=native -mtune=native -mssse3 -DFLOAT_VECTOR_ALIGNEMENT=16")
add_library(hadamard_product_perf_sse4 SHARED ${HADAMARD_PRODUCT_BASE_SOURCE})
set_property(TARGET hadamard_product_perf_sse4 PROPERTY COMPILE_FLAGS "-O3 -ftree-vectorize -march=native -mtune=native -msse4 -DFLOAT_VECTOR_ALIGNEMENT=16")
add_library(hadamard_product_perf_avx SHARED ${HADAMARD_PRODUCT_BASE_SOURCE})
set_property(TARGET hadamard_product_perf_avx PROPERTY COMPILE_FLAGS "-O3 -ftree-vectorize -march=native -mtune=native -mavx -DFLOAT_VECTOR_ALIGNEMENT=32")
add_library(hadamard_product_perf_avx2 SHARED ${HADAMARD_PRODUCT_BASE_SOURCE})
set_property(TARGET hadamard_product_perf_avx2 PROPERTY COMPILE_FLAGS "-O3 -ftree-vectorize -march=native -mtune=native -mavx2 -DFLOAT_VECTOR_ALIGNEMENT=32")
add_library(hadamard_product_perf_avx512f SHARED ${HADAMARD_PRODUCT_BASE_SOURCE})
set_property(TARGET hadamard_product_perf_avx512f PROPERTY COMPILE_FLAGS "-O3 -ftree-vectorize -march=native -mtune=native -mavx512f -DFLOAT_VECTOR_ALIGNEMENT=64")
set(HADAMARD_PRODUCT_LIBS hadamard_product_perf_anyarch hadamard_product_perf_sse2 hadamard_product_perf_ssse3 hadamard_product_perf_sse4 hadamard_product_perf_avx hadamard_product_perf_avx2 hadamard_product_perf_avx512f)
set(OUTPUT_PROXY_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/hadamard_product.cpp
${CMAKE_CURRENT_BINARY_DIR}/Hadamard_productProxyLoader.cpp)
set_source_files_properties(${OUTPUT_PROXY_SOURCES} PROPERTIES GENERATED yes)
set(OUTPUT_PROXY_FILES ${CMAKE_CURRENT_BINARY_DIR}/hadamard_product.h
${CMAKE_CURRENT_BINARY_DIR}/Hadamard_productProxyLoader.h
${OUTPUT_PROXY_FILES})
add_custom_command(OUTPUT ${OUTPUT_PROXY_FILES}
COMMAND ${CMAKE_BINARY_DIR}/src/phoenix_hpc_proxy -i ${CMAKE_CURRENT_SOURCE_DIR}/hadamard_product.h -l hadamard_product --anyarch=${CMAKE_CURRENT_BINARY_DIR}/libhadamard_product_perf_anyarch.so --sse2=${CMAKE_CURRENT_BINARY_DIR}/libhadamard_product_perf_sse2.so --ssse3=${CMAKE_CURRENT_BINARY_DIR}/libhadamard_product_perf_ssse3.so --sse4.2=${CMAKE_CURRENT_BINARY_DIR}/libhadamard_product_perf_sse4.so --avx=${CMAKE_CURRENT_BINARY_DIR}/libhadamard_product_perf_avx.so --avx2=${CMAKE_CURRENT_BINARY_DIR}/libhadamard_product_perf_avx2.so --avx512f=${CMAKE_CURRENT_BINARY_DIR}/libhadamard_product_perf_avx512f.so --libdir=${CMAKE_CURRENT_BINARY_DIR}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS phoenix_hpc_proxy ${HADAMARD_PRODUCT_LIBS}
COMMENT "Create the proxy hadamard product performance"
)
add_custom_target(create_hadamard_product_perf_proxy ALL DEPENDS ${OUTPUT_PROXY_FILES})
add_library(hadamard_product_perf_proxy SHARED ${OUTPUT_PROXY_SOURCES})
add_dependencies(hadamard_product_perf_proxy create_hadamard_product_perf_proxy)
/***************************************
Auteur : Pierre Aubert
Mail : aubertp7@gmail.com
Licence : CeCILL-C
****************************************/
#include "hadamard_product.h"
///Do the Hadamard product
/** @param[out] ptabResult : table of results of tabX*tabY
* @param ptabX : input table
* @param ptabY : input table
* @param nbElement : number of elements in the tables
*/
void hadamard_product(float* __restrict__ ptabResult, const float* __restrict__ ptabX, const float* __restrict__ ptabY, long unsigned int nbElement){
const float* tabX = (const float*)__builtin_assume_aligned(ptabX, FLOAT_VECTOR_ALIGNEMENT);
const float* tabY = (const float*)__builtin_assume_aligned(ptabY, FLOAT_VECTOR_ALIGNEMENT);
float* tabResult = (float*)__builtin_assume_aligned(ptabResult, FLOAT_VECTOR_ALIGNEMENT);
for(long unsigned int i(0lu); i < nbElement; ++i){
tabResult[i] = tabX[i]*tabY[i];
}
}
/***************************************
Auteur : Pierre Aubert
Mail : aubertp7@gmail.com
Licence : CeCILL-C
****************************************/
#ifndef __HADAMARD_PRODUCT_H__
#define __HADAMARD_PRODUCT_H__
void hadamard_product(float* __restrict__ ptabResult, const float* __restrict__ ptabX, const float* __restrict__ ptabY, long unsigned int nbElement);
#endif
/***************************************
Auteur : Pierre Aubert
Mail : aubertp7@gmail.com
Licence : CeCILL-C
****************************************/
#include <iostream>
#include "micro_benchmark.h"
#include "custom_malloc.h"
#include "hadamard_product.h"
///Get the number of nanoseconds per elements
/** @param nbElement : number of elements of the tables
*/
void evaluateHadamardProduct(size_t nbElement){
float * tabX = (float*)custom_aligned_malloc(sizeof(float)*nbElement);
float * tabY = (float*)custom_aligned_malloc(sizeof(float)*nbElement);
float * tabRes = (float*)custom_aligned_malloc(sizeof(float)*nbElement);
for(size_t i(0lu); i < nbElement; ++i){
tabX[i] = i*19lu%11;
tabY[i] = i*27lu%19;
}
micro_benchmarkAutoNsPrint("evaluate hadamard product", nbElement, hadamard_product, tabRes, tabX, tabY, nbElement);
custom_aligned_free(tabRes);
custom_aligned_free(tabY);
custom_aligned_free(tabX);
}
int main(int argc, char** argv){
return micro_benchmarkParseArg(argc, argv, evaluateHadamardProduct);
}
set terminal png notransparent crop enhanced size 800,600 font "arial,14"
set grid xtics ytics mytics
set key out vert center top
set logscale y
set xlabel "nb elements"
set ylabel "elapsed time per element [ns/el]"
set output "cmpHadamardProxyBaseElapsedTimeCyEl.png"
plot "perf_hadamard_product_O0.txt" using 1:2:($2-$4):($2+$4) title "perf hadamard product O0" with yerrorlines lw 2,"perf_hadamard_product_O2.txt" using 1:2:($2-$4):($2+$4) title "perf hadamard product O2" with yerrorlines lw 2,"perf_hadamard_product_O3.txt" using 1:2:($2-$4):($2+$4) title "perf hadamard product O3" with yerrorlines lw 2,"perf_proxy_hadamard_product_O0.txt" using 1:2:($2-$4):($2+$4) title "perf proxy hadamard product O0" with yerrorlines lw 2,"perf_proxy_hadamard_product_O2.txt" using 1:2:($2-$4):($2+$4) title "perf proxy hadamard product O2" with yerrorlines lw 2,"perf_proxy_hadamard_product_O3.txt" using 1:2:($2-$4):($2+$4) title "perf proxy hadamard product O3" with yerrorlines lw 2,
set xlabel "nb elements"
set ylabel "elapsed time [ns]"
set output "cmpHadamardProxyBaseElapsedTime.png"
plot "perf_hadamard_product_O0.txt" using 1:3:($3-$5):($3+$5) title "perf hadamard product O0" with yerrorlines lw 2,"perf_hadamard_product_O2.txt" using 1:3:($3-$5):($3+$5) title "perf hadamard product O2" with yerrorlines lw 2,"perf_hadamard_product_O3.txt" using 1:3:($3-$5):($3+$5) title "perf hadamard product O3" with yerrorlines lw 2,"perf_proxy_hadamard_product_O0.txt" using 1:3:($3-$5):($3+$5) title "perf proxy hadamard product O0" with yerrorlines lw 2,"perf_proxy_hadamard_product_O2.txt" using 1:3:($3-$5):($3+$5) title "perf proxy hadamard product O2" with yerrorlines lw 2,"perf_proxy_hadamard_product_O3.txt" using 1:3:($3-$5):($3+$5) title "perf proxy hadamard product O3" with yerrorlines lw 2,
set terminal png notransparent crop enhanced size 800,600 font "arial,14"
set grid xtics ytics mytics
set key out vert center top
set logscale y
set xlabel "nb elements"
set ylabel "elapsed time per element [ns/el]"
set output "cmpHadamardProxyVectorizeElapsedTimeCyEl.png"
plot "perf_hadamard_product_O3.txt" using 1:2:($2-$4):($2+$4) title "perf hadamard product O3" with yerrorlines lw 2,"perf_hadamard_product_vectorize_O3.txt" using 1:2:($2-$4):($2+$4) title "perf hadamard product vectorize O3" with yerrorlines lw 2,"perf_proxy_hadamard_product_O3.txt" using 1:2:($2-$4):($2+$4) title "perf proxy hadamard product O3" with yerrorlines lw 2,"perf_proxy_hadamard_product_vectorize_O3.txt" using 1:2:($2-$4):($2+$4) title "perf proxy hadamard product vectorize O3" with yerrorlines lw 2,
set xlabel "nb elements"
set ylabel "elapsed time [ns]"
set output "cmpHadamardProxyVectorizeElapsedTime.png"
plot "perf_hadamard_product_O3.txt" using 1:3:($3-$5):($3+$5) title "perf hadamard product O3" with yerrorlines lw 2,"perf_hadamard_product_vectorize_O3.txt" using 1:3:($3-$5):($3+$5) title "perf hadamard product vectorize O3" with yerrorlines lw 2,"perf_proxy_hadamard_product_O3.txt" using 1:3:($3-$5):($3+$5) title "perf proxy hadamard product O3" with yerrorlines lw 2,"perf_proxy_hadamard_product_vectorize_O3.txt" using 1:3:($3-$5):($3+$5) title "perf proxy hadamard product vectorize O3" with yerrorlines lw 2,
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment