diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 95f84e9..2e54201 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -225,4 +225,15 @@ jobs: run: ./src/ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_24_BIN }} - name: Test compile hipsycl @ CMake 3.24 if: ${{ ! cancelled() }} - run: ./src/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_24_BIN }} \ No newline at end of file + run: ./src/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_24_BIN }} + + test-futhark: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - name: Prepare Futhark compiler + uses: diku-dk/install-futhark@HEAD + with: + version: 'latest' + - run: cmake -Bbuild -H. -DMODEL=futhark -DFUTHARK_BACKEND=multicore + - run: cmake --build build diff --git a/CMakeLists.txt b/CMakeLists.txt index e89ada4..7551dc3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0") cmake_policy(SET CMP0135 NEW) endif () -project(BabelStream VERSION 4.0 LANGUAGES CXX) +project(BabelStream VERSION 4.0 LANGUAGES CXX C) # uncomment for debugging build issues: #set(CMAKE_VERBOSE_MAKEFILE ON) @@ -162,6 +162,7 @@ register_model(acc ACC ACCStream.cpp) register_model(raja USE_RAJA RAJAStream.cpp) register_model(tbb TBB TBBStream.cpp) register_model(thrust THRUST ThrustStream.cu) # Thrust uses cu, even for rocThrust +register_model(futhark FUTHARK FutharkStream.cpp) set(USAGE ON CACHE BOOL "Whether to print all custom flags for the selected model") diff --git a/README.md b/README.md index 9c185b6..487f8e9 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ BabelStream is currently implemented in the following parallel programming model - SYCL and SYCL2020 (USM and accessors) - TBB - Thrust (via CUDA or HIP) +- Futhark This project also contains implementations in alternative languages with different build systems: * Julia - [JuliaStream.jl](./src/julia/JuliaStream.jl) @@ -101,7 +102,7 @@ The source for each model's implementations are located in `./src/`. Currently available models are: ``` -omp;ocl;std-data;std-indices;std-ranges;hip;cuda;kokkos;sycl;sycl2020;acc;raja;tbb;thrust +omp;ocl;std-data;std-indices;std-ranges;hip;cuda;kokkos;sycl;sycl2020;acc;raja;tbb;thrust;futhark ``` #### Overriding default flags diff --git a/src/futhark/FutharkStream.cpp b/src/futhark/FutharkStream.cpp new file mode 100644 index 0000000..ebd3633 --- /dev/null +++ b/src/futhark/FutharkStream.cpp @@ -0,0 +1,212 @@ +// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith, +// University of Bristol HPC +// Copyright (c) 2022 Troels Henriksen +// University of Copenhagen +// +// For full license terms please see the LICENSE file distributed with this +// source code + +#include // For aligned_alloc +#include +#include "FutharkStream.h" + +template +FutharkStream::FutharkStream(const int ARRAY_SIZE, int device) +{ + this->array_size = ARRAY_SIZE; + this->cfg = futhark_context_config_new(); + this->device = "#" + std::to_string(device); +#if defined(FUTHARK_BACKEND_cuda) || defined(FUTHARK_BACKEND_opencl) + futhark_context_config_set_device(cfg, this->device.c_str()); +#endif + this->ctx = futhark_context_new(cfg); + this->a = NULL; + this->b = NULL; + this->c = NULL; +} + +template <> +FutharkStream::~FutharkStream() +{ + if (this->a) { + futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->a); + } + if (this->b) { + futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->b); + } + if (this->c) { + futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c); + } + futhark_context_free(this->ctx); + futhark_context_config_free(this->cfg); +} + +template <> +FutharkStream::~FutharkStream() +{ + if (this->a) { + futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->a); + } + if (this->b) { + futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->b); + } + if (this->c) { + futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->c); + } + futhark_context_free(this->ctx); + futhark_context_config_free(this->cfg); +} + +template <> +void FutharkStream::init_arrays(float initA, float initB, float initC) { + int array_size = this->array_size; + float *a = new float[array_size]; + float *b = new float[array_size]; + float *c = new float[array_size]; + for (int i = 0; i < array_size; i++) { + a[i] = initA; + b[i] = initB; + c[i] = initC; + } + this->a = (futhark_f32_1d*)futhark_new_f32_1d(this->ctx, a, array_size); + this->b = (futhark_f32_1d*)futhark_new_f32_1d(this->ctx, b, array_size); + this->c = (futhark_f32_1d*)futhark_new_f32_1d(this->ctx, c, array_size); + futhark_context_sync(this->ctx); + delete[] a; + delete[] b; + delete[] c; +} + +template <> +void FutharkStream::init_arrays(double initA, double initB, double initC) { + int array_size = this->array_size; + double *a = new double[array_size]; + double *b = new double[array_size]; + double *c = new double[array_size]; + for (int i = 0; i < array_size; i++) { + a[i] = initA; + b[i] = initB; + c[i] = initC; + } + this->a = (futhark_f64_1d*)futhark_new_f64_1d(this->ctx, a, array_size); + this->b = (futhark_f64_1d*)futhark_new_f64_1d(this->ctx, b, array_size); + this->c = (futhark_f64_1d*)futhark_new_f64_1d(this->ctx, c, array_size); + futhark_context_sync(this->ctx); + delete[] a; + delete[] b; + delete[] c; +} + +template <> +void FutharkStream::read_arrays(std::vector& h_a, std::vector& h_b, std::vector& h_c) { + futhark_values_f32_1d(this->ctx, (futhark_f32_1d*)this->a, h_a.data()); + futhark_values_f32_1d(this->ctx, (futhark_f32_1d*)this->b, h_b.data()); + futhark_values_f32_1d(this->ctx, (futhark_f32_1d*)this->c, h_c.data()); + futhark_context_sync(this->ctx); +} + +template <> +void FutharkStream::read_arrays(std::vector& h_a, std::vector& h_b, std::vector& h_c) { + futhark_values_f64_1d(this->ctx, (futhark_f64_1d*)this->a, h_a.data()); + futhark_values_f64_1d(this->ctx, (futhark_f64_1d*)this->b, h_b.data()); + futhark_values_f64_1d(this->ctx, (futhark_f64_1d*)this->c, h_c.data()); + futhark_context_sync(this->ctx); +} + +template <> +void FutharkStream::copy() { + futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c); + futhark_entry_f32_copy(this->ctx, (futhark_f32_1d**)&this->c, (futhark_f32_1d*)this->a); + futhark_context_sync(this->ctx); +} + +template <> +void FutharkStream::copy() { + futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->c); + futhark_entry_f64_copy(this->ctx, (futhark_f64_1d**)&this->c, (futhark_f64_1d*)this->a); + futhark_context_sync(this->ctx); +} + +template <> +void FutharkStream::mul() { + futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->b); + futhark_entry_f32_mul(this->ctx, (futhark_f32_1d**)&this->b, (futhark_f32_1d*)this->c); + futhark_context_sync(this->ctx); +} + +template <> +void FutharkStream::mul() { + futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->b); + futhark_entry_f64_mul(this->ctx, (futhark_f64_1d**)&this->b, (futhark_f64_1d*)this->c); + futhark_context_sync(this->ctx); +} + +template <> +void FutharkStream::add() { + futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c); + futhark_entry_f32_add(this->ctx, (futhark_f32_1d**)&this->c, (futhark_f32_1d*)this->a, (futhark_f32_1d*)this->b); + futhark_context_sync(this->ctx); +} + +template <> +void FutharkStream::add() { + futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->c); + futhark_entry_f64_add(this->ctx, (futhark_f64_1d**)&this->c, (futhark_f64_1d*)this->a, (futhark_f64_1d*)this->b); + futhark_context_sync(this->ctx); +} + +template <> +void FutharkStream::triad() { + futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c); + futhark_entry_f32_triad(this->ctx, (futhark_f32_1d**)&this->c, (futhark_f32_1d*)this->a, (futhark_f32_1d*)this->b); + futhark_context_sync(this->ctx); +} + +template <> +void FutharkStream::triad() { + futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->a); + futhark_entry_f64_triad(this->ctx, (futhark_f64_1d**)&this->a, (futhark_f64_1d*)this->b, (futhark_f64_1d*)this->c); + futhark_context_sync(this->ctx); +} + +template <> +void FutharkStream::nstream() { + futhark_f32_1d* d; + futhark_entry_f32_triad(this->ctx, &d, (futhark_f32_1d*)this->a, (futhark_f32_1d*)this->b); + futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c); + this->c = d; + futhark_context_sync(this->ctx); +} + +template <> +void FutharkStream::nstream() { + futhark_f64_1d* d; + futhark_entry_f64_triad(this->ctx, &d, (futhark_f64_1d*)this->a, (futhark_f64_1d*)this->b); + futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->c); + this->c = d; + futhark_context_sync(this->ctx); +} + +template <> +float FutharkStream::dot() { + float res; + futhark_entry_f32_dot(this->ctx, &res, (futhark_f32_1d*)this->a, (futhark_f32_1d*)this->b); + futhark_context_sync(this->ctx); + return res; +} + +template <> +double FutharkStream::dot() { + double res; + futhark_entry_f64_dot(this->ctx, &res, (futhark_f64_1d*)this->a, (futhark_f64_1d*)this->b); + futhark_context_sync(this->ctx); + return res; +} + +void listDevices(void) +{ + std::cout << "Device selection not supported." << std::endl; +} + +template class FutharkStream; +template class FutharkStream; diff --git a/src/futhark/FutharkStream.h b/src/futhark/FutharkStream.h new file mode 100644 index 0000000..6290e79 --- /dev/null +++ b/src/futhark/FutharkStream.h @@ -0,0 +1,60 @@ +// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith, +// University of Bristol HPC +// Copyright (c) 2022 Troels Henriksen +// University of Copenhagen +// +// For full license terms please see the LICENSE file distributed with this +// source code + +#pragma once + +#include +#include + +#include "Stream.h" +#include "babelstream.h" + +#if defined(FUTHARK_BACKEND_c) +#define IMPLEMENTATION_STRING "Futhark (sequential)" +#elif defined(FUTHARK_BACKEND_multicore) +#define IMPLEMENTATION_STRING "Futhark (parallel CPU)" +#elif defined(FUTHARK_BACKEND_opencl) +#define IMPLEMENTATION_STRING "Futhark (OpencL)" +#elif defined(FUTHARK_BACKEND_cuda) +#define IMPLEMENTATION_STRING "Futhark (CUDA)" +#else +#define IMPLEMENTATION_STRING "Futhark (unknown backend)" +#endif + +template +class FutharkStream : public Stream +{ +protected: + // Size of arrays + int array_size; + // For device selection. + std::string device; + + // Futhark stuff + struct futhark_context_config *cfg; + struct futhark_context *ctx; + + // Device side arrays + void* a; + void* b; + void* c; + +public: + FutharkStream(const int, int); + ~FutharkStream(); + + virtual void copy() override; + virtual void add() override; + virtual void mul() override; + virtual void triad() override; + virtual void nstream() override; + virtual T dot() override; + + virtual void init_arrays(T initA, T initB, T initC) override; + virtual void read_arrays(std::vector& a, std::vector& b, std::vector& c) override; +}; diff --git a/src/futhark/babelstream.fut b/src/futhark/babelstream.fut new file mode 100644 index 0000000..d513a60 --- /dev/null +++ b/src/futhark/babelstream.fut @@ -0,0 +1,62 @@ +module type kernels = { + type t + val copy [n] : [n]t -> *[n]t + val mul [n] : t -> [n]t -> [n]t + val add [n] : [n]t -> [n]t -> [n]t + val triad [n] : t -> [n]t -> [n]t -> [n]t + val dot [n] : [n]t -> [n]t -> t + -- Uniqueness allows nstream to mutate the 'a' array. + val nstream [n] : t -> *[n]t -> [n]t -> [n]t -> [n]t +} + +module kernels (P: real) : kernels with t = P.t = { + type t = P.t + def copy = copy + def mul scalar c = map (P.*scalar) c + def add = map2 (P.+) + def triad scalar b c = map2 (P.+) b (map (P.* scalar) c) + def dot a b = reduce (P.+) (P.i32 0) (map2 (P.*) a b) + def nstream scalar a b c = map2 (P.+) a (map2 (P.+) b (map (P.*scalar) c)) +} + +module f32_kernels = kernels f32 +def f32_start_scalar : f32 = 0.4 +entry f32_copy = f32_kernels.copy +entry f32_mul = f32_kernels.mul f32_start_scalar +entry f32_add = f32_kernels.add +entry f32_triad = f32_kernels.triad f32_start_scalar +entry f32_nstream = f32_kernels.nstream f32_start_scalar +entry f32_dot = f32_kernels.dot + +module f64_kernels = kernels f64 +def f64_start_scalar : f64 = 0.4 +entry f64_copy = f64_kernels.copy +entry f64_mul = f64_kernels.mul f64_start_scalar +entry f64_add = f64_kernels.add +entry f64_triad = f64_kernels.triad f64_start_scalar +entry f64_nstream = f64_kernels.nstream f64_start_scalar +entry f64_dot = f64_kernels.dot + +-- == +-- entry: f32_copy f32_mul +-- random input { [33554432]f32 } + +-- == +-- entry: f32_add f32_dot f32_triad +-- random input { [33554432]f32 [33554432]f32 } + +-- == +-- entry: f32_nstream +-- random input { [33554432]f32 [33554432]f32 [33554432]f32 } + +-- == +-- entry: f64_copy f64_mul +-- random input { [33554432]f64 } + +-- == +-- entry: f64_add f64_dot f64_triad +-- random input { [33554432]f64 [33554432]f64 } + +-- == +-- entry: f64_nstream +-- random input { [33554432]f64 [33554432]f64 [33554432]f64 } diff --git a/src/futhark/model.cmake b/src/futhark/model.cmake new file mode 100644 index 0000000..edd21fa --- /dev/null +++ b/src/futhark/model.cmake @@ -0,0 +1,55 @@ +# Use +# +# cmake -Bbuild -H. -DMODEL=futhark -DFUTHARK_BACKEND=foo -DFUTHARK_COMPILER=foo/bar/bin/futhark +# +# to use the Futhark backend, where 'foo' must be one of 'multicore', +# 'c', 'opencl', or 'cuda'. Defaults to 'multicore'. +# +# Use -DFUTHARK_COMPILER to set the path to the Futhark compiler +# binary. Defaults to 'futhark' on the PATH. + +register_flag_optional(FUTHARK_BACKEND + "Use a specific Futhark backend, possible options are: + - c + - multicore + - opencl + - cuda" + "multicore") + +register_flag_optional(FUTHARK_COMPILER + "Absolute path to the Futhark compiler, defaults to the futhark compiler on PATH" + "futhark") + +macro(setup) + add_custom_command( + OUTPUT + ${CMAKE_CURRENT_BINARY_DIR}/babelstream.c + ${CMAKE_CURRENT_BINARY_DIR}/babelstream.h + COMMAND ${FUTHARK_COMPILER} ${FUTHARK_BACKEND} + --library src/futhark/babelstream.fut + -o ${CMAKE_CURRENT_BINARY_DIR}/babelstream + DEPENDS src/futhark/babelstream.fut + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + VERBATIM + ) + if (${FUTHARK_BACKEND} STREQUAL "c") + # Nothing to do. + elseif (${FUTHARK_BACKEND} STREQUAL "multicore") + set(THREADS_PREFER_PTHREAD_FLAG ON) + find_package(Threads REQUIRED) + register_link_library(Threads::Threads) + elseif (${FUTHARK_BACKEND} STREQUAL "opencl") + find_package(OpenCL REQUIRED) + register_link_library(OpenCL::OpenCL) + elseif (${FUTHARK_BACKEND} STREQUAL "cuda") + find_package(CUDA REQUIRED) + register_link_library("nvrtc" "cuda" "cudart") + else () + message(FATAL_ERROR "Unsupported Futhark backend: ${FUTHARK_BACKEND}") + endif() +endmacro() + +macro(setup_target) + target_sources(${EXE_NAME} PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/babelstream.c") + include_directories("${CMAKE_CURRENT_BINARY_DIR}") +endmacro() diff --git a/src/main.cpp b/src/main.cpp index d7208da..d946d77 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -49,6 +49,8 @@ #include "SYCLStream2020.h" #elif defined(OMP) #include "OMPStream.h" +#elif defined(FUTHARK) +#include "FutharkStream.h" #endif // Default size of 2^25 @@ -298,6 +300,10 @@ void run() // Use the OpenMP implementation stream = new OMPStream(ARRAY_SIZE, deviceIndex); +#elif defined(FUTHARK) + // Use the Futhark implementation + stream = new FutharkStream(ARRAY_SIZE, deviceIndex); + #endif stream->init_arrays(startA, startB, startC);