Skip to content
Snippets Groups Projects
Commit 49fc30d0 authored by CHAMONT David's avatar CHAMONT David
Browse files

Ajout d'une variante regroupant explicitement les iterations en inter au GPU.

parent f036c59e
No related branches found
No related tags found
No related merge requests found
#!/bin/bash
rm -rf build
mkdir -p build
cd build
cmake ..
#!/bin/bash
time ./build/main.exe 270 480 5 10000
#time ./build/main.exe 540 960 5 10000
#time ./build/main.exe 1080 1920 5 10000
#time ./build/sycl-gray-scott.exe 2160 3840 5 1000
set(SOURCE_FILE main.cpp)
set(TARGET_NAME main.exe)
set(COMPILE_FLAGS "-fsycl -Wall")
set(LINK_FLAGS "-fsycl")
add_executable(${TARGET_NAME} ${SOURCE_FILE})
set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS}")
set_target_properties(${TARGET_NAME} PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
#add_custom_target(all DEPENDS ${TARGET_NAME})
#include <CL/sycl.hpp>
#include <array>
#include <iostream>
#include <iomanip>
#include <cmath>
using namespace cl::sycl;
constexpr float KILL_RATE { 0.062f };
constexpr float FEED_RATE { 0.03f };
constexpr float DT { 1.0f };
constexpr float DIFFUSION_RATE_U { 0.1f };
constexpr float DIFFUSION_RATE_V { 0.05f };
void submit( queue & q,
float const * iu, float const * iv,
float * ou, float * ov,
std::size_t nb_rows, std::size_t nb_cols ) {
// Submit command group for execution
q.submit([&](handler& h) {
// Define the kernel
h.parallel_for(range<2>{nb_rows,nb_cols}, [=](item<2> it) {
id<2> xy = it.get_id();
std::size_t row = xy[0] ;
std::size_t col = xy[1] ;
float u = iu[(row+1)*(nb_cols+2)+col+1];
float v = iv[(row+1)*(nb_cols+2)+col+1];
float uvv = u*v*v;
float full_u = 0.0f;
float full_v = 0.0f;
for(long k = 0l; k < 3l; ++k){
for(long l = 0l; l < 3l; ++l){
full_u += (iu[(row+k)*(nb_cols+2)+col+l] - u);
full_v += (iv[(row+k)*(nb_cols+2)+col+l] - v);
}
}
float du = DIFFUSION_RATE_U*full_u - uvv + FEED_RATE*(1.0f - u);
float dv = DIFFUSION_RATE_V*full_v + uvv - (FEED_RATE + KILL_RATE)*v;
ou[(row+1)*(nb_cols+2)+col+1] = u + du*DT;
ov[(row+1)*(nb_cols+2)+col+1] = v + dv*DT;
});
});
// Wait for the command group to finish
q.wait();
}
int main( int argc, char * argv[] ) {
// runtime parameters
assert(argc=5) ;
std::size_t nb_rows {std::stoul(argv[1])} ;
std::size_t nb_cols {std::stoul(argv[2])} ;
std::size_t nb_images {std::stoul(argv[3])} ;
std::size_t nb_iterations {std::stoul(argv[4])} ;
assert(nb_iterations % 2 == 0); // nb_iterations must be even
try {
// Loop through available platforms and devices
for (auto const& this_platform : platform::get_platforms() ) {
std::cout << "Found platform: "
<< this_platform.get_info<info::platform::name>() << std::endl;
for (auto const& this_device : this_platform.get_devices() ) {
std::cout << " Device: "
<< this_device.get_info<info::device::name>() << std::endl;
}
}
// Create SYCL queue
queue q;
// Running platform and device
std::cout << "Running on platform: "
<< q.get_device().get_platform().get_info<info::platform::name>() << std::endl;
std::cout << " Device: "
<< q.get_device().get_info<info::device::name>() << std::endl;
std::cout << std::endl;
// Initialize input array
const std::size_t padded_nb_rows { nb_rows+2 };
const std::size_t padded_nb_cols { nb_cols+2 };
const std::size_t size { padded_nb_rows*padded_nb_cols };
float * u1 = malloc_shared<float>(size, q);
float * u2 = malloc_shared<float>(size, q);
float * v1 = malloc_shared<float>(size, q);
float * v2 = malloc_shared<float>(size, q);
for (int i = 0; i < padded_nb_rows; i++) {
for (int j = 0; j < padded_nb_cols; j++) {
u1[i*padded_nb_cols+j] = 1.f;
v1[i*padded_nb_cols+j] = 0.f;
u2[i*padded_nb_cols+j] = 1.f;
v2[i*padded_nb_cols+j] = 0.f;
}
}
const std::size_t v_row_begin { (7ul*padded_nb_rows+8ul)/16ul };
const std::size_t v_row_end { (9ul*padded_nb_rows+8ul)/16ul };
const std::size_t v_col_begin { (7ul*padded_nb_cols+8ul)/16ul };
const std::size_t v_col_end { (9ul*padded_nb_cols+8ul)/16ul };
std::cout << "v_row_begin: " << v_row_begin << std::endl;
std::cout << "v_row_end: " << v_row_end << std::endl;
std::cout << "v_col_begin: " << v_col_begin << std::endl;
std::cout << "v_col_end: " << v_col_end << std::endl;
std::cout << std::endl;
for (int i = v_row_begin; i < v_row_end; i++) {
for (int j = v_col_begin; j < v_col_end; j++) {
u1[i*padded_nb_cols+j] = 0.f;
v1[i*padded_nb_cols+j] = 1.f;
}
}
// iterations
for ( std::size_t image = 0 ; image < nb_images ; ++image ) {
for ( std::size_t iter = 0 ; iter < nb_iterations ; iter += 2 ) {
submit( q, u1, v1, u2, v2, nb_rows, nb_cols );
submit( q, u2, v2, u1, v1, nb_rows, nb_cols );
}
}
// Print some result
const std::size_t row_center { padded_nb_rows/2ul };
const std::size_t col_center { padded_nb_cols/2ul };
std::cout<<std::fixed<<std::setprecision(2) ;
for (std::size_t i = (row_center-5ul) ; i < (row_center+5ul); i++) {
for (std::size_t j = (col_center-5ul); j < (col_center+5ul); j++) {
std::cout << u1[i*padded_nb_cols+j] << " ";
}
std::cout << "\n";
}
std::cout << std::endl;
std::cout<<std::fixed<<std::setprecision(2) ;
for (std::size_t i = (row_center-5ul) ; i < (row_center+5ul); i++) {
for (std::size_t j = (col_center-5ul); j < (col_center+5ul); j++) {
std::cout << v1[i*padded_nb_cols+j] << " ";
}
std::cout << "\n";
}
std::cout << std::endl;
}
catch (sycl::exception & e) {
std::cout << e.what() << std::endl;
std::cout << e.code().message() << std::endl;
}
catch (std::exception & e) {
std::cout << e.what() << std::endl;
}
catch (const char * e) {
std::cout << e << std::endl;
}
return 0;
}
...@@ -24,21 +24,22 @@ int main() { ...@@ -24,21 +24,22 @@ int main() {
<< q.get_device().get_info<info::device::name>() << "\n"; << q.get_device().get_info<info::device::name>() << "\n";
std::cout << "\n"; std::cout << "\n";
std::array<float,SIZE> input, output ;
float * binput = sycl::malloc_device<float>(SIZE, q);
float * boutput = sycl::malloc_device<float>(SIZE, q);
// Initialize input array // Initialize input array
std::array<float,SIZE> input, output ;
for (std::size_t i = 0; i < SIZE; i++) { for (std::size_t i = 0; i < SIZE; i++) {
input[i] = i + 1; input[i] = i + 1;
} }
// Alloc memory on device
float * dinput = malloc_device<float>(SIZE, q);
float * doutput = malloc_device<float>(SIZE, q);
// Submit command group for execution // Submit command group for execution
q.memcpy(binput,input.data(),SIZE*sizeof(float)).wait(); q.memcpy(dinput,input.data(),SIZE*sizeof(float)).wait();
q.parallel_for(SIZE, [=](id<1> idx) { q.parallel_for(SIZE, [=](id<1> idx) {
boutput[idx] = binput[idx] * binput[idx]; houtput[idx] = hinput[idx] * hinput[idx];
}).wait(); }).wait();
q.memcpy(output.data(),boutput,SIZE*sizeof(float)).wait(); q.memcpy(output.data(),doutput,SIZE*sizeof(float)).wait();
// Print the result // Print the result
for (int i = 0; i < SIZE; i++) { for (int i = 0; i < SIZE; i++) {
...@@ -47,8 +48,8 @@ int main() { ...@@ -47,8 +48,8 @@ int main() {
std::cout << std::endl; std::cout << std::endl;
// Release resources // Release resources
sycl::free(binput, q); sycl::free(dinput, q);
sycl::free(boutput, q); sycl::free(doutput, q);
return 0; return 0;
} }
...@@ -24,8 +24,8 @@ int main() { ...@@ -24,8 +24,8 @@ int main() {
<< q.get_device().get_info<info::device::name>() << "\n"; << q.get_device().get_info<info::device::name>() << "\n";
std::cout << "\n"; std::cout << "\n";
auto * input = sycl::malloc_shared<float>(SIZE, q); auto * input = malloc_shared<float>(SIZE, q);
auto * output = sycl::malloc_shared<float>(SIZE, q); auto * output = malloc_shared<float>(SIZE, q);
// Initialize input array // Initialize input array
for (std::size_t i = 0; i < SIZE; i++) { for (std::size_t i = 0; i < SIZE; i++) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment