Skip to content
Snippets Groups Projects
Commit 49fc30d0 authored by CHAMONT David's avatar CHAMONT David
Browse files

Ajout d'une variante regroupant explicitement les iterations en inter au GPU.

parent f036c59e
No related branches found
No related tags found
No related merge requests found
Showing
with 442 additions and 4 deletions
File moved
File moved
File moved
File moved
set(SOURCE_FILE sycl-gray-scott.cpp)
set(TARGET_NAME sycl-gray-scott.exe)
set(SOURCE_FILE main.cpp)
set(TARGET_NAME main.exe)
set(COMPILE_FLAGS "-fsycl -Wall")
set(LINK_FLAGS "-fsycl")
......
......@@ -169,8 +169,7 @@ int main( int argc, char * argv[] ) {
}
catch (sycl::exception & e) {
std::cout << e.what() << std::endl;
std::cout << e.category() << std::endl;
std::cout << e.code() << std::endl;
std::cout << e.code().message() << std::endl;
}
catch (std::exception & e) {
std::cout << e.what() << std::endl;
......
# Direct CMake to use icpx rather than the default C++ compiler/linker
set(CMAKE_CXX_COMPILER icpx)
cmake_minimum_required (VERSION 3.4)
project(SyclSquare CXX)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
add_subdirectory (src)
#!/bin/bash
cd build
make all
#!/bin/bash
rm -rf build
mkdir -p build
cd build
cmake ..
#!/bin/bash
time ./build/main.exe 270 480 5 10000
#time ./build/main.exe 540 960 5 10000
#time ./build/main.exe 1080 1920 5 10000
#time ./build/sycl-gray-scott.exe 2160 3840 5 1000
set(SOURCE_FILE main.cpp)
set(TARGET_NAME main.exe)
set(COMPILE_FLAGS "-fsycl -Wall")
set(LINK_FLAGS "-fsycl")
add_executable(${TARGET_NAME} ${SOURCE_FILE})
set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS}")
set_target_properties(${TARGET_NAME} PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
#add_custom_target(all DEPENDS ${TARGET_NAME})
#include <CL/sycl.hpp>
#include <array>
#include <iostream>
#include <iomanip>
#include <cmath>
using namespace cl::sycl;
constexpr float KILL_RATE { 0.062f };
constexpr float FEED_RATE { 0.03f };
constexpr float DT { 1.0f };
constexpr float DIFFUSION_RATE_U { 0.1f };
constexpr float DIFFUSION_RATE_V { 0.05f };
void submit( queue & q,
float const * iu, float const * iv,
float * ou, float * ov,
float * iud, float * ivd,
float * oud, float * ovd,
std::size_t nb_rows, std::size_t nb_cols ) {
// Submit command group for execution
q.memcpy(iud,iu,(nb_rows+2)*(nb_cols+2)*sizeof(float)).wait();
q.memcpy(ivd,iv,(nb_rows+2)*(nb_cols+2)*sizeof(float)).wait();
q.submit([&](handler& h) {
// Define the kernel
h.parallel_for(range<2>{nb_rows,nb_cols}, [=](item<2> it) {
id<2> xy = it.get_id();
std::size_t row = xy[0] ;
std::size_t col = xy[1] ;
float u = iud[(row+1)*(nb_cols+2)+col+1];
float v = ivd[(row+1)*(nb_cols+2)+col+1];
float uvv = u*v*v;
float full_u = 0.0f;
float full_v = 0.0f;
for(long k = 0l; k < 3l; ++k){
for(long l = 0l; l < 3l; ++l){
full_u += (iud[(row+k)*(nb_cols+2)+col+l] - u);
full_v += (ivd[(row+k)*(nb_cols+2)+col+l] - v);
}
}
float du = DIFFUSION_RATE_U*full_u - uvv + FEED_RATE*(1.0f - u);
float dv = DIFFUSION_RATE_V*full_v + uvv - (FEED_RATE + KILL_RATE)*v;
oud[(row+1)*(nb_cols+2)+col+1] = u + du*DT;
ovd[(row+1)*(nb_cols+2)+col+1] = v + dv*DT;
});
}).wait();
q.memcpy(ou,oud,(nb_rows+2)*(nb_cols+2)*sizeof(float)).wait();
q.memcpy(ov,ovd,(nb_rows+2)*(nb_cols+2)*sizeof(float)).wait();
}
int main( int argc, char * argv[] ) {
// runtime parameters
assert(argc=5) ;
std::size_t nb_rows {std::stoul(argv[1])} ;
std::size_t nb_cols {std::stoul(argv[2])} ;
std::size_t nb_images {std::stoul(argv[3])} ;
std::size_t nb_iterations {std::stoul(argv[4])} ;
assert(nb_iterations % 2 == 0); // nb_iterations must be even
try {
// Loop through available platforms and devices
for (auto const& this_platform : platform::get_platforms() ) {
std::cout << "Found platform: "
<< this_platform.get_info<info::platform::name>() << std::endl;
for (auto const& this_device : this_platform.get_devices() ) {
std::cout << " Device: "
<< this_device.get_info<info::device::name>() << std::endl;
}
}
// Create SYCL queue
queue q;
// Running platform and device
std::cout << "Running on platform: "
<< q.get_device().get_platform().get_info<info::platform::name>() << std::endl;
std::cout << " Device: "
<< q.get_device().get_info<info::device::name>() << std::endl;
std::cout << std::endl;
// Initialize local arrays
const std::size_t padded_nb_rows { nb_rows+2 };
const std::size_t padded_nb_cols { nb_cols+2 };
const std::size_t size { padded_nb_rows*padded_nb_cols };
std::vector<float> u1(size);
std::vector<float> v1(size);
std::vector<float> u2(size);
std::vector<float> v2(size);
for (int i = 0; i < padded_nb_rows; i++) {
for (int j = 0; j < padded_nb_cols; j++) {
u1[i*padded_nb_cols+j] = 1.f;
v1[i*padded_nb_cols+j] = 0.f;
u2[i*padded_nb_cols+j] = 1.f;
v2[i*padded_nb_cols+j] = 0.f;
}
}
const std::size_t v_row_begin { (7ul*padded_nb_rows+8ul)/16ul };
const std::size_t v_row_end { (9ul*padded_nb_rows+8ul)/16ul };
const std::size_t v_col_begin { (7ul*padded_nb_cols+8ul)/16ul };
const std::size_t v_col_end { (9ul*padded_nb_cols+8ul)/16ul };
std::cout << "v_row_begin: " << v_row_begin << std::endl;
std::cout << "v_row_end: " << v_row_end << std::endl;
std::cout << "v_col_begin: " << v_col_begin << std::endl;
std::cout << "v_col_end: " << v_col_end << std::endl;
std::cout << std::endl;
for (int i = v_row_begin; i < v_row_end; i++) {
for (int j = v_col_begin; j < v_col_end; j++) {
u1[i*padded_nb_cols+j] = 0.f;
v1[i*padded_nb_cols+j] = 1.f;
}
}
// Create device arrays
float * iud = malloc_device<float>(size, q);
float * ivd = malloc_device<float>(size, q);
float * oud = malloc_device<float>(size, q);
float * ovd = malloc_device<float>(size, q);
// iterations
for ( std::size_t image = 0 ; image < nb_images ; ++image ) {
for ( std::size_t iter = 0 ; iter < nb_iterations ; iter += 2 ) {
submit( q, u1.data(), v1.data(), u2.data(), v2.data(), iud, ivd, oud, ovd, nb_rows, nb_cols );
submit( q, u2.data(), v2.data(), u1.data(), v1.data(), iud, ivd, oud, ovd, nb_rows, nb_cols );
}
}
// Print some result
const std::size_t row_center { padded_nb_rows/2ul };
const std::size_t col_center { padded_nb_cols/2ul };
std::cout<<std::fixed<<std::setprecision(2) ;
for (std::size_t i = (row_center-5ul) ; i < (row_center+5ul); i++) {
for (std::size_t j = (col_center-5ul); j < (col_center+5ul); j++) {
std::cout << u1[i*padded_nb_cols+j] << " ";
}
std::cout << "\n";
}
std::cout << std::endl;
for (std::size_t i = (row_center-5ul) ; i < (row_center+5ul); i++) {
for (std::size_t j = (col_center-5ul); j < (col_center+5ul); j++) {
std::cout << v1[i*padded_nb_cols+j] << " ";
}
std::cout << "\n";
}
std::cout << std::endl;
// Release device arrays
sycl::free(iud,q);
sycl::free(ivd,q);
sycl::free(oud,q);
sycl::free(ovd,q);
}
catch (sycl::exception & e) {
std::cout << e.what() << std::endl;
std::cout << e.code().message() << std::endl;
}
catch (std::exception & e) {
std::cout << e.what() << std::endl;
}
catch (const char * e) {
std::cout << e << std::endl;
}
return 0;
}
# Direct CMake to use icpx rather than the default C++ compiler/linker
set(CMAKE_CXX_COMPILER icpx)
cmake_minimum_required (VERSION 3.4)
project(SyclSquare CXX)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
add_subdirectory (src)
#!/bin/bash
cd build
make all
#!/bin/bash
rm -rf build
mkdir -p build
cd build
cmake ..
#!/bin/bash
time ./build/main.exe 270 480 5 10000
#time ./build/main.exe 540 960 5 10000
#time ./build/main.exe 1080 1920 5 10000
#time ./build/sycl-gray-scott.exe 2160 3840 5 1000
set(SOURCE_FILE main.cpp)
set(TARGET_NAME main.exe)
set(COMPILE_FLAGS "-fsycl -Wall")
set(LINK_FLAGS "-fsycl")
add_executable(${TARGET_NAME} ${SOURCE_FILE})
set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS}")
set_target_properties(${TARGET_NAME} PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
#add_custom_target(all DEPENDS ${TARGET_NAME})
#include <CL/sycl.hpp>
#include <array>
#include <iostream>
#include <iomanip>
#include <cmath>
using namespace cl::sycl;
constexpr float KILL_RATE { 0.062f };
constexpr float FEED_RATE { 0.03f };
constexpr float DT { 1.0f };
constexpr float DIFFUSION_RATE_U { 0.1f };
constexpr float DIFFUSION_RATE_V { 0.05f };
void submit( queue & q,
float const * iu, float const * iv,
float * ou, float * ov,
float * iud, float * ivd,
float * oud, float * ovd,
std::size_t nb_rows, std::size_t nb_cols ) {
// Submit command group for execution
q.memcpy(iud,iu,(nb_rows+2)*(nb_cols+2)*sizeof(float)).wait();
q.memcpy(ivd,iv,(nb_rows+2)*(nb_cols+2)*sizeof(float)).wait();
q.submit([&](handler& h) {
// Define the kernel
h.parallel_for(range<2>{nb_rows,nb_cols}, [=](item<2> it) {
id<2> xy = it.get_id();
std::size_t row = xy[0] ;
std::size_t col = xy[1] ;
float u = iud[(row+1)*(nb_cols+2)+col+1];
float v = ivd[(row+1)*(nb_cols+2)+col+1];
float uvv = u*v*v;
float full_u = 0.0f;
float full_v = 0.0f;
for(long k = 0l; k < 3l; ++k){
for(long l = 0l; l < 3l; ++l){
full_u += (iud[(row+k)*(nb_cols+2)+col+l] - u);
full_v += (ivd[(row+k)*(nb_cols+2)+col+l] - v);
}
}
float du = DIFFUSION_RATE_U*full_u - uvv + FEED_RATE*(1.0f - u);
float dv = DIFFUSION_RATE_V*full_v + uvv - (FEED_RATE + KILL_RATE)*v;
oud[(row+1)*(nb_cols+2)+col+1] = u + du*DT;
ovd[(row+1)*(nb_cols+2)+col+1] = v + dv*DT;
});
}).wait();
q.memcpy(ou,oud,(nb_rows+2)*(nb_cols+2)*sizeof(float)).wait();
q.memcpy(ov,ovd,(nb_rows+2)*(nb_cols+2)*sizeof(float)).wait();
}
int main( int argc, char * argv[] ) {
// runtime parameters
assert(argc=5) ;
std::size_t nb_rows {std::stoul(argv[1])} ;
std::size_t nb_cols {std::stoul(argv[2])} ;
std::size_t nb_images {std::stoul(argv[3])} ;
std::size_t nb_iterations {std::stoul(argv[4])} ;
assert(nb_iterations % 2 == 0); // nb_iterations must be even
try {
// Loop through available platforms and devices
for (auto const& this_platform : platform::get_platforms() ) {
std::cout << "Found platform: "
<< this_platform.get_info<info::platform::name>() << std::endl;
for (auto const& this_device : this_platform.get_devices() ) {
std::cout << " Device: "
<< this_device.get_info<info::device::name>() << std::endl;
}
}
// Create SYCL queue
queue q;
// Running platform and device
std::cout << "Running on platform: "
<< q.get_device().get_platform().get_info<info::platform::name>() << std::endl;
std::cout << " Device: "
<< q.get_device().get_info<info::device::name>() << std::endl;
std::cout << std::endl;
// Initialize local arrays
const std::size_t padded_nb_rows { nb_rows+2 };
const std::size_t padded_nb_cols { nb_cols+2 };
const std::size_t size { padded_nb_rows*padded_nb_cols };
std::vector<float> u1(size);
std::vector<float> v1(size);
std::vector<float> u2(size);
std::vector<float> v2(size);
for (int i = 0; i < padded_nb_rows; i++) {
for (int j = 0; j < padded_nb_cols; j++) {
u1[i*padded_nb_cols+j] = 1.f;
v1[i*padded_nb_cols+j] = 0.f;
u2[i*padded_nb_cols+j] = 1.f;
v2[i*padded_nb_cols+j] = 0.f;
}
}
const std::size_t v_row_begin { (7ul*padded_nb_rows+8ul)/16ul };
const std::size_t v_row_end { (9ul*padded_nb_rows+8ul)/16ul };
const std::size_t v_col_begin { (7ul*padded_nb_cols+8ul)/16ul };
const std::size_t v_col_end { (9ul*padded_nb_cols+8ul)/16ul };
std::cout << "v_row_begin: " << v_row_begin << std::endl;
std::cout << "v_row_end: " << v_row_end << std::endl;
std::cout << "v_col_begin: " << v_col_begin << std::endl;
std::cout << "v_col_end: " << v_col_end << std::endl;
std::cout << std::endl;
for (int i = v_row_begin; i < v_row_end; i++) {
for (int j = v_col_begin; j < v_col_end; j++) {
u1[i*padded_nb_cols+j] = 0.f;
v1[i*padded_nb_cols+j] = 1.f;
}
}
// Create device arrays
float * iud = malloc_device<float>(size, q);
float * ivd = malloc_device<float>(size, q);
float * oud = malloc_device<float>(size, q);
float * ovd = malloc_device<float>(size, q);
// iterations
for ( std::size_t image = 0 ; image < nb_images ; ++image ) {
for ( std::size_t iter = 0 ; iter < nb_iterations ; iter += 2 ) {
submit( q, u1.data(), v1.data(), u2.data(), v2.data(), iud, ivd, oud, ovd, nb_rows, nb_cols );
submit( q, u2.data(), v2.data(), u1.data(), v1.data(), iud, ivd, oud, ovd, nb_rows, nb_cols );
}
}
// Print some result
const std::size_t row_center { padded_nb_rows/2ul };
const std::size_t col_center { padded_nb_cols/2ul };
std::cout<<std::fixed<<std::setprecision(2) ;
for (std::size_t i = (row_center-5ul) ; i < (row_center+5ul); i++) {
for (std::size_t j = (col_center-5ul); j < (col_center+5ul); j++) {
std::cout << u1[i*padded_nb_cols+j] << " ";
}
std::cout << "\n";
}
std::cout << std::endl;
for (std::size_t i = (row_center-5ul) ; i < (row_center+5ul); i++) {
for (std::size_t j = (col_center-5ul); j < (col_center+5ul); j++) {
std::cout << v1[i*padded_nb_cols+j] << " ";
}
std::cout << "\n";
}
std::cout << std::endl;
// Release device arrays
sycl::free(iud,q);
sycl::free(ivd,q);
sycl::free(oud,q);
sycl::free(ovd,q);
}
catch (sycl::exception & e) {
std::cout << e.what() << std::endl;
std::cout << e.code().message() << std::endl;
}
catch (std::exception & e) {
std::cout << e.what() << std::endl;
}
catch (const char * e) {
std::cout << e << std::endl;
}
return 0;
}
# Direct CMake to use icpx rather than the default C++ compiler/linker
set(CMAKE_CXX_COMPILER icpx)
cmake_minimum_required (VERSION 3.4)
project(SyclSquare CXX)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
add_subdirectory (src)
#!/bin/bash
cd build
make all
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment