From 49fc30d03b4215986beffca7ac85f2c2344dbc0a Mon Sep 17 00:00:00 2001 From: David Chamont <chamont@in2p3.fr> Date: Thu, 6 Jul 2023 12:37:55 -0700 Subject: [PATCH] Ajout d'une variante regroupant explicitement les iterations en inter au GPU. --- .../CMakeLists.txt | 0 .../build.bash | 0 .../cmake.bash | 0 {SyclGrayScott => GrayScottBuffers}/run.bash | 0 .../src/CMakeLists.txt | 4 +- .../src/main.cpp | 3 +- GrayScottDevice/CMakeLists.txt | 12 ++ GrayScottDevice/build.bash | 3 + GrayScottDevice/cmake.bash | 5 + GrayScottDevice/run.bash | 5 + GrayScottDevice/src/CMakeLists.txt | 10 + GrayScottDevice/src/main.cpp | 177 ++++++++++++++++++ GrayScottIterations/CMakeLists.txt | 12 ++ GrayScottIterations/build.bash | 3 + GrayScottIterations/cmake.bash | 5 + GrayScottIterations/run.bash | 5 + GrayScottIterations/src/CMakeLists.txt | 10 + GrayScottIterations/src/main.cpp | 177 ++++++++++++++++++ GrayScottShared/CMakeLists.txt | 12 ++ GrayScottShared/build.bash | 3 + GrayScottShared/cmake.bash | 5 + GrayScottShared/run.bash | 5 + GrayScottShared/src/CMakeLists.txt | 10 + GrayScottShared/src/main.cpp | 163 ++++++++++++++++ SquareDevice/src/main.cpp | 19 +- SquareShared/src/main.cpp | 4 +- 26 files changed, 637 insertions(+), 15 deletions(-) rename {SyclGrayScott => GrayScottBuffers}/CMakeLists.txt (100%) rename {SyclGrayScott => GrayScottBuffers}/build.bash (100%) rename {SyclGrayScott => GrayScottBuffers}/cmake.bash (100%) rename {SyclGrayScott => GrayScottBuffers}/run.bash (100%) rename {SyclGrayScott => GrayScottBuffers}/src/CMakeLists.txt (80%) rename SyclGrayScott/src/sycl-gray-scott.cpp => GrayScottBuffers/src/main.cpp (98%) create mode 100755 GrayScottDevice/CMakeLists.txt create mode 100755 GrayScottDevice/build.bash create mode 100755 GrayScottDevice/cmake.bash create mode 100755 GrayScottDevice/run.bash create mode 100755 GrayScottDevice/src/CMakeLists.txt create mode 100644 GrayScottDevice/src/main.cpp create mode 100755 GrayScottIterations/CMakeLists.txt create mode 100755 GrayScottIterations/build.bash create mode 100755 GrayScottIterations/cmake.bash create mode 100755 GrayScottIterations/run.bash create mode 100755 GrayScottIterations/src/CMakeLists.txt create mode 100644 GrayScottIterations/src/main.cpp create mode 100755 GrayScottShared/CMakeLists.txt create mode 100755 GrayScottShared/build.bash create mode 100755 GrayScottShared/cmake.bash create mode 100755 GrayScottShared/run.bash create mode 100755 GrayScottShared/src/CMakeLists.txt create mode 100644 GrayScottShared/src/main.cpp diff --git a/SyclGrayScott/CMakeLists.txt b/GrayScottBuffers/CMakeLists.txt similarity index 100% rename from SyclGrayScott/CMakeLists.txt rename to GrayScottBuffers/CMakeLists.txt diff --git a/SyclGrayScott/build.bash b/GrayScottBuffers/build.bash similarity index 100% rename from SyclGrayScott/build.bash rename to GrayScottBuffers/build.bash diff --git a/SyclGrayScott/cmake.bash b/GrayScottBuffers/cmake.bash similarity index 100% rename from SyclGrayScott/cmake.bash rename to GrayScottBuffers/cmake.bash diff --git a/SyclGrayScott/run.bash b/GrayScottBuffers/run.bash similarity index 100% rename from SyclGrayScott/run.bash rename to GrayScottBuffers/run.bash diff --git a/SyclGrayScott/src/CMakeLists.txt b/GrayScottBuffers/src/CMakeLists.txt similarity index 80% rename from SyclGrayScott/src/CMakeLists.txt rename to GrayScottBuffers/src/CMakeLists.txt index 1f5496e..fe04241 100755 --- a/SyclGrayScott/src/CMakeLists.txt +++ b/GrayScottBuffers/src/CMakeLists.txt @@ -1,5 +1,5 @@ -set(SOURCE_FILE sycl-gray-scott.cpp) -set(TARGET_NAME sycl-gray-scott.exe) +set(SOURCE_FILE main.cpp) +set(TARGET_NAME main.exe) set(COMPILE_FLAGS "-fsycl -Wall") set(LINK_FLAGS "-fsycl") diff --git a/SyclGrayScott/src/sycl-gray-scott.cpp b/GrayScottBuffers/src/main.cpp similarity index 98% rename from SyclGrayScott/src/sycl-gray-scott.cpp rename to GrayScottBuffers/src/main.cpp index 99f8bc3..f88fd74 100644 --- a/SyclGrayScott/src/sycl-gray-scott.cpp +++ b/GrayScottBuffers/src/main.cpp @@ -169,8 +169,7 @@ int main( int argc, char * argv[] ) { } catch (sycl::exception & e) { std::cout << e.what() << std::endl; - std::cout << e.category() << std::endl; - std::cout << e.code() << std::endl; + std::cout << e.code().message() << std::endl; } catch (std::exception & e) { std::cout << e.what() << std::endl; diff --git a/GrayScottDevice/CMakeLists.txt b/GrayScottDevice/CMakeLists.txt new file mode 100755 index 0000000..36103fa --- /dev/null +++ b/GrayScottDevice/CMakeLists.txt @@ -0,0 +1,12 @@ +# Direct CMake to use icpx rather than the default C++ compiler/linker +set(CMAKE_CXX_COMPILER icpx) + +cmake_minimum_required (VERSION 3.4) + +project(SyclSquare CXX) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/GrayScottDevice/build.bash b/GrayScottDevice/build.bash new file mode 100755 index 0000000..49344dd --- /dev/null +++ b/GrayScottDevice/build.bash @@ -0,0 +1,3 @@ +#!/bin/bash +cd build +make all diff --git a/GrayScottDevice/cmake.bash b/GrayScottDevice/cmake.bash new file mode 100755 index 0000000..869411d --- /dev/null +++ b/GrayScottDevice/cmake.bash @@ -0,0 +1,5 @@ +#!/bin/bash +rm -rf build +mkdir -p build +cd build +cmake .. diff --git a/GrayScottDevice/run.bash b/GrayScottDevice/run.bash new file mode 100755 index 0000000..26a9b8e --- /dev/null +++ b/GrayScottDevice/run.bash @@ -0,0 +1,5 @@ +#!/bin/bash +time ./build/main.exe 270 480 5 10000 +#time ./build/main.exe 540 960 5 10000 +#time ./build/main.exe 1080 1920 5 10000 +#time ./build/sycl-gray-scott.exe 2160 3840 5 1000 diff --git a/GrayScottDevice/src/CMakeLists.txt b/GrayScottDevice/src/CMakeLists.txt new file mode 100755 index 0000000..fe04241 --- /dev/null +++ b/GrayScottDevice/src/CMakeLists.txt @@ -0,0 +1,10 @@ +set(SOURCE_FILE main.cpp) +set(TARGET_NAME main.exe) + +set(COMPILE_FLAGS "-fsycl -Wall") +set(LINK_FLAGS "-fsycl") + +add_executable(${TARGET_NAME} ${SOURCE_FILE}) +set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS}") +set_target_properties(${TARGET_NAME} PROPERTIES LINK_FLAGS "${LINK_FLAGS}") +#add_custom_target(all DEPENDS ${TARGET_NAME}) diff --git a/GrayScottDevice/src/main.cpp b/GrayScottDevice/src/main.cpp new file mode 100644 index 0000000..acab7db --- /dev/null +++ b/GrayScottDevice/src/main.cpp @@ -0,0 +1,177 @@ +#include <CL/sycl.hpp> +#include <array> +#include <iostream> +#include <iomanip> +#include <cmath> + +using namespace cl::sycl; + +constexpr float KILL_RATE { 0.062f }; +constexpr float FEED_RATE { 0.03f }; +constexpr float DT { 1.0f }; + +constexpr float DIFFUSION_RATE_U { 0.1f }; +constexpr float DIFFUSION_RATE_V { 0.05f }; + +void submit( queue & q, + float const * iu, float const * iv, + float * ou, float * ov, + float * iud, float * ivd, + float * oud, float * ovd, + std::size_t nb_rows, std::size_t nb_cols ) { + + // Submit command group for execution + q.memcpy(iud,iu,(nb_rows+2)*(nb_cols+2)*sizeof(float)).wait(); + q.memcpy(ivd,iv,(nb_rows+2)*(nb_cols+2)*sizeof(float)).wait(); + q.submit([&](handler& h) { + + // Define the kernel + h.parallel_for(range<2>{nb_rows,nb_cols}, [=](item<2> it) { + + id<2> xy = it.get_id(); + std::size_t row = xy[0] ; + std::size_t col = xy[1] ; + + float u = iud[(row+1)*(nb_cols+2)+col+1]; + float v = ivd[(row+1)*(nb_cols+2)+col+1]; + float uvv = u*v*v; + + float full_u = 0.0f; + float full_v = 0.0f; + for(long k = 0l; k < 3l; ++k){ + for(long l = 0l; l < 3l; ++l){ + full_u += (iud[(row+k)*(nb_cols+2)+col+l] - u); + full_v += (ivd[(row+k)*(nb_cols+2)+col+l] - v); + } + } + + float du = DIFFUSION_RATE_U*full_u - uvv + FEED_RATE*(1.0f - u); + float dv = DIFFUSION_RATE_V*full_v + uvv - (FEED_RATE + KILL_RATE)*v; + + oud[(row+1)*(nb_cols+2)+col+1] = u + du*DT; + ovd[(row+1)*(nb_cols+2)+col+1] = v + dv*DT; + }); + + }).wait(); + q.memcpy(ou,oud,(nb_rows+2)*(nb_cols+2)*sizeof(float)).wait(); + q.memcpy(ov,ovd,(nb_rows+2)*(nb_cols+2)*sizeof(float)).wait(); +} + +int main( int argc, char * argv[] ) { + + // runtime parameters + assert(argc=5) ; + std::size_t nb_rows {std::stoul(argv[1])} ; + std::size_t nb_cols {std::stoul(argv[2])} ; + std::size_t nb_images {std::stoul(argv[3])} ; + std::size_t nb_iterations {std::stoul(argv[4])} ; + assert(nb_iterations % 2 == 0); // nb_iterations must be even + + try { + + // Loop through available platforms and devices + for (auto const& this_platform : platform::get_platforms() ) { + std::cout << "Found platform: " + << this_platform.get_info<info::platform::name>() << std::endl; + for (auto const& this_device : this_platform.get_devices() ) { + std::cout << " Device: " + << this_device.get_info<info::device::name>() << std::endl; + } + } + + // Create SYCL queue + queue q; + + // Running platform and device + std::cout << "Running on platform: " + << q.get_device().get_platform().get_info<info::platform::name>() << std::endl; + std::cout << " Device: " + << q.get_device().get_info<info::device::name>() << std::endl; + std::cout << std::endl; + + // Initialize local arrays + const std::size_t padded_nb_rows { nb_rows+2 }; + const std::size_t padded_nb_cols { nb_cols+2 }; + const std::size_t size { padded_nb_rows*padded_nb_cols }; + std::vector<float> u1(size); + std::vector<float> v1(size); + std::vector<float> u2(size); + std::vector<float> v2(size); + for (int i = 0; i < padded_nb_rows; i++) { + for (int j = 0; j < padded_nb_cols; j++) { + u1[i*padded_nb_cols+j] = 1.f; + v1[i*padded_nb_cols+j] = 0.f; + u2[i*padded_nb_cols+j] = 1.f; + v2[i*padded_nb_cols+j] = 0.f; + } + } + const std::size_t v_row_begin { (7ul*padded_nb_rows+8ul)/16ul }; + const std::size_t v_row_end { (9ul*padded_nb_rows+8ul)/16ul }; + const std::size_t v_col_begin { (7ul*padded_nb_cols+8ul)/16ul }; + const std::size_t v_col_end { (9ul*padded_nb_cols+8ul)/16ul }; + std::cout << "v_row_begin: " << v_row_begin << std::endl; + std::cout << "v_row_end: " << v_row_end << std::endl; + std::cout << "v_col_begin: " << v_col_begin << std::endl; + std::cout << "v_col_end: " << v_col_end << std::endl; + std::cout << std::endl; + for (int i = v_row_begin; i < v_row_end; i++) { + for (int j = v_col_begin; j < v_col_end; j++) { + u1[i*padded_nb_cols+j] = 0.f; + v1[i*padded_nb_cols+j] = 1.f; + } + } + + // Create device arrays + float * iud = malloc_device<float>(size, q); + float * ivd = malloc_device<float>(size, q); + float * oud = malloc_device<float>(size, q); + float * ovd = malloc_device<float>(size, q); + + // iterations + for ( std::size_t image = 0 ; image < nb_images ; ++image ) { + for ( std::size_t iter = 0 ; iter < nb_iterations ; iter += 2 ) { + submit( q, u1.data(), v1.data(), u2.data(), v2.data(), iud, ivd, oud, ovd, nb_rows, nb_cols ); + submit( q, u2.data(), v2.data(), u1.data(), v1.data(), iud, ivd, oud, ovd, nb_rows, nb_cols ); + } + } + + // Print some result + const std::size_t row_center { padded_nb_rows/2ul }; + const std::size_t col_center { padded_nb_cols/2ul }; + std::cout<<std::fixed<<std::setprecision(2) ; + for (std::size_t i = (row_center-5ul) ; i < (row_center+5ul); i++) { + for (std::size_t j = (col_center-5ul); j < (col_center+5ul); j++) { + std::cout << u1[i*padded_nb_cols+j] << " "; + } + std::cout << "\n"; + } + std::cout << std::endl; + for (std::size_t i = (row_center-5ul) ; i < (row_center+5ul); i++) { + for (std::size_t j = (col_center-5ul); j < (col_center+5ul); j++) { + std::cout << v1[i*padded_nb_cols+j] << " "; + } + std::cout << "\n"; + } + std::cout << std::endl; + + // Release device arrays + sycl::free(iud,q); + sycl::free(ivd,q); + sycl::free(oud,q); + sycl::free(ovd,q); + + } + catch (sycl::exception & e) { + std::cout << e.what() << std::endl; + std::cout << e.code().message() << std::endl; + } + catch (std::exception & e) { + std::cout << e.what() << std::endl; + } + catch (const char * e) { + std::cout << e << std::endl; + } + + + return 0; +} diff --git a/GrayScottIterations/CMakeLists.txt b/GrayScottIterations/CMakeLists.txt new file mode 100755 index 0000000..36103fa --- /dev/null +++ b/GrayScottIterations/CMakeLists.txt @@ -0,0 +1,12 @@ +# Direct CMake to use icpx rather than the default C++ compiler/linker +set(CMAKE_CXX_COMPILER icpx) + +cmake_minimum_required (VERSION 3.4) + +project(SyclSquare CXX) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/GrayScottIterations/build.bash b/GrayScottIterations/build.bash new file mode 100755 index 0000000..49344dd --- /dev/null +++ b/GrayScottIterations/build.bash @@ -0,0 +1,3 @@ +#!/bin/bash +cd build +make all diff --git a/GrayScottIterations/cmake.bash b/GrayScottIterations/cmake.bash new file mode 100755 index 0000000..869411d --- /dev/null +++ b/GrayScottIterations/cmake.bash @@ -0,0 +1,5 @@ +#!/bin/bash +rm -rf build +mkdir -p build +cd build +cmake .. diff --git a/GrayScottIterations/run.bash b/GrayScottIterations/run.bash new file mode 100755 index 0000000..26a9b8e --- /dev/null +++ b/GrayScottIterations/run.bash @@ -0,0 +1,5 @@ +#!/bin/bash +time ./build/main.exe 270 480 5 10000 +#time ./build/main.exe 540 960 5 10000 +#time ./build/main.exe 1080 1920 5 10000 +#time ./build/sycl-gray-scott.exe 2160 3840 5 1000 diff --git a/GrayScottIterations/src/CMakeLists.txt b/GrayScottIterations/src/CMakeLists.txt new file mode 100755 index 0000000..fe04241 --- /dev/null +++ b/GrayScottIterations/src/CMakeLists.txt @@ -0,0 +1,10 @@ +set(SOURCE_FILE main.cpp) +set(TARGET_NAME main.exe) + +set(COMPILE_FLAGS "-fsycl -Wall") +set(LINK_FLAGS "-fsycl") + +add_executable(${TARGET_NAME} ${SOURCE_FILE}) +set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS}") +set_target_properties(${TARGET_NAME} PROPERTIES LINK_FLAGS "${LINK_FLAGS}") +#add_custom_target(all DEPENDS ${TARGET_NAME}) diff --git a/GrayScottIterations/src/main.cpp b/GrayScottIterations/src/main.cpp new file mode 100644 index 0000000..acab7db --- /dev/null +++ b/GrayScottIterations/src/main.cpp @@ -0,0 +1,177 @@ +#include <CL/sycl.hpp> +#include <array> +#include <iostream> +#include <iomanip> +#include <cmath> + +using namespace cl::sycl; + +constexpr float KILL_RATE { 0.062f }; +constexpr float FEED_RATE { 0.03f }; +constexpr float DT { 1.0f }; + +constexpr float DIFFUSION_RATE_U { 0.1f }; +constexpr float DIFFUSION_RATE_V { 0.05f }; + +void submit( queue & q, + float const * iu, float const * iv, + float * ou, float * ov, + float * iud, float * ivd, + float * oud, float * ovd, + std::size_t nb_rows, std::size_t nb_cols ) { + + // Submit command group for execution + q.memcpy(iud,iu,(nb_rows+2)*(nb_cols+2)*sizeof(float)).wait(); + q.memcpy(ivd,iv,(nb_rows+2)*(nb_cols+2)*sizeof(float)).wait(); + q.submit([&](handler& h) { + + // Define the kernel + h.parallel_for(range<2>{nb_rows,nb_cols}, [=](item<2> it) { + + id<2> xy = it.get_id(); + std::size_t row = xy[0] ; + std::size_t col = xy[1] ; + + float u = iud[(row+1)*(nb_cols+2)+col+1]; + float v = ivd[(row+1)*(nb_cols+2)+col+1]; + float uvv = u*v*v; + + float full_u = 0.0f; + float full_v = 0.0f; + for(long k = 0l; k < 3l; ++k){ + for(long l = 0l; l < 3l; ++l){ + full_u += (iud[(row+k)*(nb_cols+2)+col+l] - u); + full_v += (ivd[(row+k)*(nb_cols+2)+col+l] - v); + } + } + + float du = DIFFUSION_RATE_U*full_u - uvv + FEED_RATE*(1.0f - u); + float dv = DIFFUSION_RATE_V*full_v + uvv - (FEED_RATE + KILL_RATE)*v; + + oud[(row+1)*(nb_cols+2)+col+1] = u + du*DT; + ovd[(row+1)*(nb_cols+2)+col+1] = v + dv*DT; + }); + + }).wait(); + q.memcpy(ou,oud,(nb_rows+2)*(nb_cols+2)*sizeof(float)).wait(); + q.memcpy(ov,ovd,(nb_rows+2)*(nb_cols+2)*sizeof(float)).wait(); +} + +int main( int argc, char * argv[] ) { + + // runtime parameters + assert(argc=5) ; + std::size_t nb_rows {std::stoul(argv[1])} ; + std::size_t nb_cols {std::stoul(argv[2])} ; + std::size_t nb_images {std::stoul(argv[3])} ; + std::size_t nb_iterations {std::stoul(argv[4])} ; + assert(nb_iterations % 2 == 0); // nb_iterations must be even + + try { + + // Loop through available platforms and devices + for (auto const& this_platform : platform::get_platforms() ) { + std::cout << "Found platform: " + << this_platform.get_info<info::platform::name>() << std::endl; + for (auto const& this_device : this_platform.get_devices() ) { + std::cout << " Device: " + << this_device.get_info<info::device::name>() << std::endl; + } + } + + // Create SYCL queue + queue q; + + // Running platform and device + std::cout << "Running on platform: " + << q.get_device().get_platform().get_info<info::platform::name>() << std::endl; + std::cout << " Device: " + << q.get_device().get_info<info::device::name>() << std::endl; + std::cout << std::endl; + + // Initialize local arrays + const std::size_t padded_nb_rows { nb_rows+2 }; + const std::size_t padded_nb_cols { nb_cols+2 }; + const std::size_t size { padded_nb_rows*padded_nb_cols }; + std::vector<float> u1(size); + std::vector<float> v1(size); + std::vector<float> u2(size); + std::vector<float> v2(size); + for (int i = 0; i < padded_nb_rows; i++) { + for (int j = 0; j < padded_nb_cols; j++) { + u1[i*padded_nb_cols+j] = 1.f; + v1[i*padded_nb_cols+j] = 0.f; + u2[i*padded_nb_cols+j] = 1.f; + v2[i*padded_nb_cols+j] = 0.f; + } + } + const std::size_t v_row_begin { (7ul*padded_nb_rows+8ul)/16ul }; + const std::size_t v_row_end { (9ul*padded_nb_rows+8ul)/16ul }; + const std::size_t v_col_begin { (7ul*padded_nb_cols+8ul)/16ul }; + const std::size_t v_col_end { (9ul*padded_nb_cols+8ul)/16ul }; + std::cout << "v_row_begin: " << v_row_begin << std::endl; + std::cout << "v_row_end: " << v_row_end << std::endl; + std::cout << "v_col_begin: " << v_col_begin << std::endl; + std::cout << "v_col_end: " << v_col_end << std::endl; + std::cout << std::endl; + for (int i = v_row_begin; i < v_row_end; i++) { + for (int j = v_col_begin; j < v_col_end; j++) { + u1[i*padded_nb_cols+j] = 0.f; + v1[i*padded_nb_cols+j] = 1.f; + } + } + + // Create device arrays + float * iud = malloc_device<float>(size, q); + float * ivd = malloc_device<float>(size, q); + float * oud = malloc_device<float>(size, q); + float * ovd = malloc_device<float>(size, q); + + // iterations + for ( std::size_t image = 0 ; image < nb_images ; ++image ) { + for ( std::size_t iter = 0 ; iter < nb_iterations ; iter += 2 ) { + submit( q, u1.data(), v1.data(), u2.data(), v2.data(), iud, ivd, oud, ovd, nb_rows, nb_cols ); + submit( q, u2.data(), v2.data(), u1.data(), v1.data(), iud, ivd, oud, ovd, nb_rows, nb_cols ); + } + } + + // Print some result + const std::size_t row_center { padded_nb_rows/2ul }; + const std::size_t col_center { padded_nb_cols/2ul }; + std::cout<<std::fixed<<std::setprecision(2) ; + for (std::size_t i = (row_center-5ul) ; i < (row_center+5ul); i++) { + for (std::size_t j = (col_center-5ul); j < (col_center+5ul); j++) { + std::cout << u1[i*padded_nb_cols+j] << " "; + } + std::cout << "\n"; + } + std::cout << std::endl; + for (std::size_t i = (row_center-5ul) ; i < (row_center+5ul); i++) { + for (std::size_t j = (col_center-5ul); j < (col_center+5ul); j++) { + std::cout << v1[i*padded_nb_cols+j] << " "; + } + std::cout << "\n"; + } + std::cout << std::endl; + + // Release device arrays + sycl::free(iud,q); + sycl::free(ivd,q); + sycl::free(oud,q); + sycl::free(ovd,q); + + } + catch (sycl::exception & e) { + std::cout << e.what() << std::endl; + std::cout << e.code().message() << std::endl; + } + catch (std::exception & e) { + std::cout << e.what() << std::endl; + } + catch (const char * e) { + std::cout << e << std::endl; + } + + + return 0; +} diff --git a/GrayScottShared/CMakeLists.txt b/GrayScottShared/CMakeLists.txt new file mode 100755 index 0000000..36103fa --- /dev/null +++ b/GrayScottShared/CMakeLists.txt @@ -0,0 +1,12 @@ +# Direct CMake to use icpx rather than the default C++ compiler/linker +set(CMAKE_CXX_COMPILER icpx) + +cmake_minimum_required (VERSION 3.4) + +project(SyclSquare CXX) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +add_subdirectory (src) diff --git a/GrayScottShared/build.bash b/GrayScottShared/build.bash new file mode 100755 index 0000000..49344dd --- /dev/null +++ b/GrayScottShared/build.bash @@ -0,0 +1,3 @@ +#!/bin/bash +cd build +make all diff --git a/GrayScottShared/cmake.bash b/GrayScottShared/cmake.bash new file mode 100755 index 0000000..869411d --- /dev/null +++ b/GrayScottShared/cmake.bash @@ -0,0 +1,5 @@ +#!/bin/bash +rm -rf build +mkdir -p build +cd build +cmake .. diff --git a/GrayScottShared/run.bash b/GrayScottShared/run.bash new file mode 100755 index 0000000..26a9b8e --- /dev/null +++ b/GrayScottShared/run.bash @@ -0,0 +1,5 @@ +#!/bin/bash +time ./build/main.exe 270 480 5 10000 +#time ./build/main.exe 540 960 5 10000 +#time ./build/main.exe 1080 1920 5 10000 +#time ./build/sycl-gray-scott.exe 2160 3840 5 1000 diff --git a/GrayScottShared/src/CMakeLists.txt b/GrayScottShared/src/CMakeLists.txt new file mode 100755 index 0000000..fe04241 --- /dev/null +++ b/GrayScottShared/src/CMakeLists.txt @@ -0,0 +1,10 @@ +set(SOURCE_FILE main.cpp) +set(TARGET_NAME main.exe) + +set(COMPILE_FLAGS "-fsycl -Wall") +set(LINK_FLAGS "-fsycl") + +add_executable(${TARGET_NAME} ${SOURCE_FILE}) +set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS}") +set_target_properties(${TARGET_NAME} PROPERTIES LINK_FLAGS "${LINK_FLAGS}") +#add_custom_target(all DEPENDS ${TARGET_NAME}) diff --git a/GrayScottShared/src/main.cpp b/GrayScottShared/src/main.cpp new file mode 100644 index 0000000..6b21005 --- /dev/null +++ b/GrayScottShared/src/main.cpp @@ -0,0 +1,163 @@ +#include <CL/sycl.hpp> +#include <array> +#include <iostream> +#include <iomanip> +#include <cmath> + +using namespace cl::sycl; + +constexpr float KILL_RATE { 0.062f }; +constexpr float FEED_RATE { 0.03f }; +constexpr float DT { 1.0f }; + +constexpr float DIFFUSION_RATE_U { 0.1f }; +constexpr float DIFFUSION_RATE_V { 0.05f }; + +void submit( queue & q, + float const * iu, float const * iv, + float * ou, float * ov, + std::size_t nb_rows, std::size_t nb_cols ) { + + // Submit command group for execution + q.submit([&](handler& h) { + + // Define the kernel + h.parallel_for(range<2>{nb_rows,nb_cols}, [=](item<2> it) { + + id<2> xy = it.get_id(); + std::size_t row = xy[0] ; + std::size_t col = xy[1] ; + + float u = iu[(row+1)*(nb_cols+2)+col+1]; + float v = iv[(row+1)*(nb_cols+2)+col+1]; + float uvv = u*v*v; + + float full_u = 0.0f; + float full_v = 0.0f; + for(long k = 0l; k < 3l; ++k){ + for(long l = 0l; l < 3l; ++l){ + full_u += (iu[(row+k)*(nb_cols+2)+col+l] - u); + full_v += (iv[(row+k)*(nb_cols+2)+col+l] - v); + } + } + + float du = DIFFUSION_RATE_U*full_u - uvv + FEED_RATE*(1.0f - u); + float dv = DIFFUSION_RATE_V*full_v + uvv - (FEED_RATE + KILL_RATE)*v; + + ou[(row+1)*(nb_cols+2)+col+1] = u + du*DT; + ov[(row+1)*(nb_cols+2)+col+1] = v + dv*DT; + }); + + }); + + // Wait for the command group to finish + q.wait(); +} + +int main( int argc, char * argv[] ) { + + // runtime parameters + assert(argc=5) ; + std::size_t nb_rows {std::stoul(argv[1])} ; + std::size_t nb_cols {std::stoul(argv[2])} ; + std::size_t nb_images {std::stoul(argv[3])} ; + std::size_t nb_iterations {std::stoul(argv[4])} ; + assert(nb_iterations % 2 == 0); // nb_iterations must be even + + try { + + // Loop through available platforms and devices + for (auto const& this_platform : platform::get_platforms() ) { + std::cout << "Found platform: " + << this_platform.get_info<info::platform::name>() << std::endl; + for (auto const& this_device : this_platform.get_devices() ) { + std::cout << " Device: " + << this_device.get_info<info::device::name>() << std::endl; + } + } + + // Create SYCL queue + queue q; + + // Running platform and device + std::cout << "Running on platform: " + << q.get_device().get_platform().get_info<info::platform::name>() << std::endl; + std::cout << " Device: " + << q.get_device().get_info<info::device::name>() << std::endl; + std::cout << std::endl; + + // Initialize input array + const std::size_t padded_nb_rows { nb_rows+2 }; + const std::size_t padded_nb_cols { nb_cols+2 }; + const std::size_t size { padded_nb_rows*padded_nb_cols }; + float * u1 = malloc_shared<float>(size, q); + float * u2 = malloc_shared<float>(size, q); + float * v1 = malloc_shared<float>(size, q); + float * v2 = malloc_shared<float>(size, q); + for (int i = 0; i < padded_nb_rows; i++) { + for (int j = 0; j < padded_nb_cols; j++) { + u1[i*padded_nb_cols+j] = 1.f; + v1[i*padded_nb_cols+j] = 0.f; + u2[i*padded_nb_cols+j] = 1.f; + v2[i*padded_nb_cols+j] = 0.f; + } + } + const std::size_t v_row_begin { (7ul*padded_nb_rows+8ul)/16ul }; + const std::size_t v_row_end { (9ul*padded_nb_rows+8ul)/16ul }; + const std::size_t v_col_begin { (7ul*padded_nb_cols+8ul)/16ul }; + const std::size_t v_col_end { (9ul*padded_nb_cols+8ul)/16ul }; + std::cout << "v_row_begin: " << v_row_begin << std::endl; + std::cout << "v_row_end: " << v_row_end << std::endl; + std::cout << "v_col_begin: " << v_col_begin << std::endl; + std::cout << "v_col_end: " << v_col_end << std::endl; + std::cout << std::endl; + for (int i = v_row_begin; i < v_row_end; i++) { + for (int j = v_col_begin; j < v_col_end; j++) { + u1[i*padded_nb_cols+j] = 0.f; + v1[i*padded_nb_cols+j] = 1.f; + } + } + + // iterations + for ( std::size_t image = 0 ; image < nb_images ; ++image ) { + for ( std::size_t iter = 0 ; iter < nb_iterations ; iter += 2 ) { + submit( q, u1, v1, u2, v2, nb_rows, nb_cols ); + submit( q, u2, v2, u1, v1, nb_rows, nb_cols ); + } + } + + // Print some result + const std::size_t row_center { padded_nb_rows/2ul }; + const std::size_t col_center { padded_nb_cols/2ul }; + std::cout<<std::fixed<<std::setprecision(2) ; + for (std::size_t i = (row_center-5ul) ; i < (row_center+5ul); i++) { + for (std::size_t j = (col_center-5ul); j < (col_center+5ul); j++) { + std::cout << u1[i*padded_nb_cols+j] << " "; + } + std::cout << "\n"; + } + std::cout << std::endl; + std::cout<<std::fixed<<std::setprecision(2) ; + for (std::size_t i = (row_center-5ul) ; i < (row_center+5ul); i++) { + for (std::size_t j = (col_center-5ul); j < (col_center+5ul); j++) { + std::cout << v1[i*padded_nb_cols+j] << " "; + } + std::cout << "\n"; + } + std::cout << std::endl; + + } + catch (sycl::exception & e) { + std::cout << e.what() << std::endl; + std::cout << e.code().message() << std::endl; + } + catch (std::exception & e) { + std::cout << e.what() << std::endl; + } + catch (const char * e) { + std::cout << e << std::endl; + } + + + return 0; +} diff --git a/SquareDevice/src/main.cpp b/SquareDevice/src/main.cpp index 4e4ccee..7f1beae 100644 --- a/SquareDevice/src/main.cpp +++ b/SquareDevice/src/main.cpp @@ -24,21 +24,22 @@ int main() { << q.get_device().get_info<info::device::name>() << "\n"; std::cout << "\n"; - std::array<float,SIZE> input, output ; - float * binput = sycl::malloc_device<float>(SIZE, q); - float * boutput = sycl::malloc_device<float>(SIZE, q); - // Initialize input array + std::array<float,SIZE> input, output ; for (std::size_t i = 0; i < SIZE; i++) { input[i] = i + 1; } + // Alloc memory on device + float * dinput = malloc_device<float>(SIZE, q); + float * doutput = malloc_device<float>(SIZE, q); + // Submit command group for execution - q.memcpy(binput,input.data(),SIZE*sizeof(float)).wait(); + q.memcpy(dinput,input.data(),SIZE*sizeof(float)).wait(); q.parallel_for(SIZE, [=](id<1> idx) { - boutput[idx] = binput[idx] * binput[idx]; + houtput[idx] = hinput[idx] * hinput[idx]; }).wait(); - q.memcpy(output.data(),boutput,SIZE*sizeof(float)).wait(); + q.memcpy(output.data(),doutput,SIZE*sizeof(float)).wait(); // Print the result for (int i = 0; i < SIZE; i++) { @@ -47,8 +48,8 @@ int main() { std::cout << std::endl; // Release resources - sycl::free(binput, q); - sycl::free(boutput, q); + sycl::free(dinput, q); + sycl::free(doutput, q); return 0; } diff --git a/SquareShared/src/main.cpp b/SquareShared/src/main.cpp index ff77c3d..a4882ae 100644 --- a/SquareShared/src/main.cpp +++ b/SquareShared/src/main.cpp @@ -24,8 +24,8 @@ int main() { << q.get_device().get_info<info::device::name>() << "\n"; std::cout << "\n"; - auto * input = sycl::malloc_shared<float>(SIZE, q); - auto * output = sycl::malloc_shared<float>(SIZE, q); + auto * input = malloc_shared<float>(SIZE, q); + auto * output = malloc_shared<float>(SIZE, q); // Initialize input array for (std::size_t i = 0; i < SIZE; i++) { -- GitLab