Add Futhark implementation (#146)

* Add Futhark.
This commit is contained in:
Troels Henriksen 2023-10-03 14:57:20 +02:00 committed by GitHub
parent 2e3ebeecab
commit 92fed7082b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 411 additions and 3 deletions

View File

@ -225,4 +225,15 @@ jobs:
run: ./src/ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_24_BIN }} run: ./src/ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_24_BIN }}
- name: Test compile hipsycl @ CMake 3.24 - name: Test compile hipsycl @ CMake 3.24
if: ${{ ! cancelled() }} if: ${{ ! cancelled() }}
run: ./src/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_24_BIN }} run: ./src/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_24_BIN }}
test-futhark:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Prepare Futhark compiler
uses: diku-dk/install-futhark@HEAD
with:
version: 'latest'
- run: cmake -Bbuild -H. -DMODEL=futhark -DFUTHARK_BACKEND=multicore
- run: cmake --build build

View File

@ -4,7 +4,7 @@ if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
cmake_policy(SET CMP0135 NEW) cmake_policy(SET CMP0135 NEW)
endif () endif ()
project(BabelStream VERSION 4.0 LANGUAGES CXX) project(BabelStream VERSION 4.0 LANGUAGES CXX C)
# uncomment for debugging build issues: # uncomment for debugging build issues:
#set(CMAKE_VERBOSE_MAKEFILE ON) #set(CMAKE_VERBOSE_MAKEFILE ON)
@ -162,6 +162,7 @@ register_model(acc ACC ACCStream.cpp)
register_model(raja USE_RAJA RAJAStream.cpp) register_model(raja USE_RAJA RAJAStream.cpp)
register_model(tbb TBB TBBStream.cpp) register_model(tbb TBB TBBStream.cpp)
register_model(thrust THRUST ThrustStream.cu) # Thrust uses cu, even for rocThrust register_model(thrust THRUST ThrustStream.cu) # Thrust uses cu, even for rocThrust
register_model(futhark FUTHARK FutharkStream.cpp)
set(USAGE ON CACHE BOOL "Whether to print all custom flags for the selected model") set(USAGE ON CACHE BOOL "Whether to print all custom flags for the selected model")

View File

@ -41,6 +41,7 @@ BabelStream is currently implemented in the following parallel programming model
- SYCL and SYCL2020 (USM and accessors) - SYCL and SYCL2020 (USM and accessors)
- TBB - TBB
- Thrust (via CUDA or HIP) - Thrust (via CUDA or HIP)
- Futhark
This project also contains implementations in alternative languages with different build systems: This project also contains implementations in alternative languages with different build systems:
* Julia - [JuliaStream.jl](./src/julia/JuliaStream.jl) * Julia - [JuliaStream.jl](./src/julia/JuliaStream.jl)
@ -101,7 +102,7 @@ The source for each model's implementations are located in `./src/<model>`.
Currently available models are: Currently available models are:
``` ```
omp;ocl;std-data;std-indices;std-ranges;hip;cuda;kokkos;sycl;sycl2020;acc;raja;tbb;thrust omp;ocl;std-data;std-indices;std-ranges;hip;cuda;kokkos;sycl;sycl2020;acc;raja;tbb;thrust;futhark
``` ```
#### Overriding default flags #### Overriding default flags

View File

@ -0,0 +1,212 @@
// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith,
// University of Bristol HPC
// Copyright (c) 2022 Troels Henriksen
// University of Copenhagen
//
// For full license terms please see the LICENSE file distributed with this
// source code
#include <cstdlib> // For aligned_alloc
#include <string>
#include "FutharkStream.h"
template <class T>
FutharkStream<T>::FutharkStream(const int ARRAY_SIZE, int device)
{
this->array_size = ARRAY_SIZE;
this->cfg = futhark_context_config_new();
this->device = "#" + std::to_string(device);
#if defined(FUTHARK_BACKEND_cuda) || defined(FUTHARK_BACKEND_opencl)
futhark_context_config_set_device(cfg, this->device.c_str());
#endif
this->ctx = futhark_context_new(cfg);
this->a = NULL;
this->b = NULL;
this->c = NULL;
}
template <>
FutharkStream<float>::~FutharkStream()
{
if (this->a) {
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->a);
}
if (this->b) {
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->b);
}
if (this->c) {
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c);
}
futhark_context_free(this->ctx);
futhark_context_config_free(this->cfg);
}
template <>
FutharkStream<double>::~FutharkStream()
{
if (this->a) {
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->a);
}
if (this->b) {
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->b);
}
if (this->c) {
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->c);
}
futhark_context_free(this->ctx);
futhark_context_config_free(this->cfg);
}
template <>
void FutharkStream<float>::init_arrays(float initA, float initB, float initC) {
int array_size = this->array_size;
float *a = new float[array_size];
float *b = new float[array_size];
float *c = new float[array_size];
for (int i = 0; i < array_size; i++) {
a[i] = initA;
b[i] = initB;
c[i] = initC;
}
this->a = (futhark_f32_1d*)futhark_new_f32_1d(this->ctx, a, array_size);
this->b = (futhark_f32_1d*)futhark_new_f32_1d(this->ctx, b, array_size);
this->c = (futhark_f32_1d*)futhark_new_f32_1d(this->ctx, c, array_size);
futhark_context_sync(this->ctx);
delete[] a;
delete[] b;
delete[] c;
}
template <>
void FutharkStream<double>::init_arrays(double initA, double initB, double initC) {
int array_size = this->array_size;
double *a = new double[array_size];
double *b = new double[array_size];
double *c = new double[array_size];
for (int i = 0; i < array_size; i++) {
a[i] = initA;
b[i] = initB;
c[i] = initC;
}
this->a = (futhark_f64_1d*)futhark_new_f64_1d(this->ctx, a, array_size);
this->b = (futhark_f64_1d*)futhark_new_f64_1d(this->ctx, b, array_size);
this->c = (futhark_f64_1d*)futhark_new_f64_1d(this->ctx, c, array_size);
futhark_context_sync(this->ctx);
delete[] a;
delete[] b;
delete[] c;
}
template <>
void FutharkStream<float>::read_arrays(std::vector<float>& h_a, std::vector<float>& h_b, std::vector<float>& h_c) {
futhark_values_f32_1d(this->ctx, (futhark_f32_1d*)this->a, h_a.data());
futhark_values_f32_1d(this->ctx, (futhark_f32_1d*)this->b, h_b.data());
futhark_values_f32_1d(this->ctx, (futhark_f32_1d*)this->c, h_c.data());
futhark_context_sync(this->ctx);
}
template <>
void FutharkStream<double>::read_arrays(std::vector<double>& h_a, std::vector<double>& h_b, std::vector<double>& h_c) {
futhark_values_f64_1d(this->ctx, (futhark_f64_1d*)this->a, h_a.data());
futhark_values_f64_1d(this->ctx, (futhark_f64_1d*)this->b, h_b.data());
futhark_values_f64_1d(this->ctx, (futhark_f64_1d*)this->c, h_c.data());
futhark_context_sync(this->ctx);
}
template <>
void FutharkStream<float>::copy() {
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c);
futhark_entry_f32_copy(this->ctx, (futhark_f32_1d**)&this->c, (futhark_f32_1d*)this->a);
futhark_context_sync(this->ctx);
}
template <>
void FutharkStream<double>::copy() {
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->c);
futhark_entry_f64_copy(this->ctx, (futhark_f64_1d**)&this->c, (futhark_f64_1d*)this->a);
futhark_context_sync(this->ctx);
}
template <>
void FutharkStream<float>::mul() {
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->b);
futhark_entry_f32_mul(this->ctx, (futhark_f32_1d**)&this->b, (futhark_f32_1d*)this->c);
futhark_context_sync(this->ctx);
}
template <>
void FutharkStream<double>::mul() {
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->b);
futhark_entry_f64_mul(this->ctx, (futhark_f64_1d**)&this->b, (futhark_f64_1d*)this->c);
futhark_context_sync(this->ctx);
}
template <>
void FutharkStream<float>::add() {
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c);
futhark_entry_f32_add(this->ctx, (futhark_f32_1d**)&this->c, (futhark_f32_1d*)this->a, (futhark_f32_1d*)this->b);
futhark_context_sync(this->ctx);
}
template <>
void FutharkStream<double>::add() {
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->c);
futhark_entry_f64_add(this->ctx, (futhark_f64_1d**)&this->c, (futhark_f64_1d*)this->a, (futhark_f64_1d*)this->b);
futhark_context_sync(this->ctx);
}
template <>
void FutharkStream<float>::triad() {
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c);
futhark_entry_f32_triad(this->ctx, (futhark_f32_1d**)&this->c, (futhark_f32_1d*)this->a, (futhark_f32_1d*)this->b);
futhark_context_sync(this->ctx);
}
template <>
void FutharkStream<double>::triad() {
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->a);
futhark_entry_f64_triad(this->ctx, (futhark_f64_1d**)&this->a, (futhark_f64_1d*)this->b, (futhark_f64_1d*)this->c);
futhark_context_sync(this->ctx);
}
template <>
void FutharkStream<float>::nstream() {
futhark_f32_1d* d;
futhark_entry_f32_triad(this->ctx, &d, (futhark_f32_1d*)this->a, (futhark_f32_1d*)this->b);
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c);
this->c = d;
futhark_context_sync(this->ctx);
}
template <>
void FutharkStream<double>::nstream() {
futhark_f64_1d* d;
futhark_entry_f64_triad(this->ctx, &d, (futhark_f64_1d*)this->a, (futhark_f64_1d*)this->b);
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->c);
this->c = d;
futhark_context_sync(this->ctx);
}
template <>
float FutharkStream<float>::dot() {
float res;
futhark_entry_f32_dot(this->ctx, &res, (futhark_f32_1d*)this->a, (futhark_f32_1d*)this->b);
futhark_context_sync(this->ctx);
return res;
}
template <>
double FutharkStream<double>::dot() {
double res;
futhark_entry_f64_dot(this->ctx, &res, (futhark_f64_1d*)this->a, (futhark_f64_1d*)this->b);
futhark_context_sync(this->ctx);
return res;
}
void listDevices(void)
{
std::cout << "Device selection not supported." << std::endl;
}
template class FutharkStream<float>;
template class FutharkStream<double>;

View File

@ -0,0 +1,60 @@
// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith,
// University of Bristol HPC
// Copyright (c) 2022 Troels Henriksen
// University of Copenhagen
//
// For full license terms please see the LICENSE file distributed with this
// source code
#pragma once
#include <iostream>
#include <stdexcept>
#include "Stream.h"
#include "babelstream.h"
#if defined(FUTHARK_BACKEND_c)
#define IMPLEMENTATION_STRING "Futhark (sequential)"
#elif defined(FUTHARK_BACKEND_multicore)
#define IMPLEMENTATION_STRING "Futhark (parallel CPU)"
#elif defined(FUTHARK_BACKEND_opencl)
#define IMPLEMENTATION_STRING "Futhark (OpencL)"
#elif defined(FUTHARK_BACKEND_cuda)
#define IMPLEMENTATION_STRING "Futhark (CUDA)"
#else
#define IMPLEMENTATION_STRING "Futhark (unknown backend)"
#endif
template <class T>
class FutharkStream : public Stream<T>
{
protected:
// Size of arrays
int array_size;
// For device selection.
std::string device;
// Futhark stuff
struct futhark_context_config *cfg;
struct futhark_context *ctx;
// Device side arrays
void* a;
void* b;
void* c;
public:
FutharkStream(const int, int);
~FutharkStream();
virtual void copy() override;
virtual void add() override;
virtual void mul() override;
virtual void triad() override;
virtual void nstream() override;
virtual T dot() override;
virtual void init_arrays(T initA, T initB, T initC) override;
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
};

View File

@ -0,0 +1,62 @@
module type kernels = {
type t
val copy [n] : [n]t -> *[n]t
val mul [n] : t -> [n]t -> [n]t
val add [n] : [n]t -> [n]t -> [n]t
val triad [n] : t -> [n]t -> [n]t -> [n]t
val dot [n] : [n]t -> [n]t -> t
-- Uniqueness allows nstream to mutate the 'a' array.
val nstream [n] : t -> *[n]t -> [n]t -> [n]t -> [n]t
}
module kernels (P: real) : kernels with t = P.t = {
type t = P.t
def copy = copy
def mul scalar c = map (P.*scalar) c
def add = map2 (P.+)
def triad scalar b c = map2 (P.+) b (map (P.* scalar) c)
def dot a b = reduce (P.+) (P.i32 0) (map2 (P.*) a b)
def nstream scalar a b c = map2 (P.+) a (map2 (P.+) b (map (P.*scalar) c))
}
module f32_kernels = kernels f32
def f32_start_scalar : f32 = 0.4
entry f32_copy = f32_kernels.copy
entry f32_mul = f32_kernels.mul f32_start_scalar
entry f32_add = f32_kernels.add
entry f32_triad = f32_kernels.triad f32_start_scalar
entry f32_nstream = f32_kernels.nstream f32_start_scalar
entry f32_dot = f32_kernels.dot
module f64_kernels = kernels f64
def f64_start_scalar : f64 = 0.4
entry f64_copy = f64_kernels.copy
entry f64_mul = f64_kernels.mul f64_start_scalar
entry f64_add = f64_kernels.add
entry f64_triad = f64_kernels.triad f64_start_scalar
entry f64_nstream = f64_kernels.nstream f64_start_scalar
entry f64_dot = f64_kernels.dot
-- ==
-- entry: f32_copy f32_mul
-- random input { [33554432]f32 }
-- ==
-- entry: f32_add f32_dot f32_triad
-- random input { [33554432]f32 [33554432]f32 }
-- ==
-- entry: f32_nstream
-- random input { [33554432]f32 [33554432]f32 [33554432]f32 }
-- ==
-- entry: f64_copy f64_mul
-- random input { [33554432]f64 }
-- ==
-- entry: f64_add f64_dot f64_triad
-- random input { [33554432]f64 [33554432]f64 }
-- ==
-- entry: f64_nstream
-- random input { [33554432]f64 [33554432]f64 [33554432]f64 }

55
src/futhark/model.cmake Normal file
View File

@ -0,0 +1,55 @@
# Use
#
# cmake -Bbuild -H. -DMODEL=futhark -DFUTHARK_BACKEND=foo -DFUTHARK_COMPILER=foo/bar/bin/futhark
#
# to use the Futhark backend, where 'foo' must be one of 'multicore',
# 'c', 'opencl', or 'cuda'. Defaults to 'multicore'.
#
# Use -DFUTHARK_COMPILER to set the path to the Futhark compiler
# binary. Defaults to 'futhark' on the PATH.
register_flag_optional(FUTHARK_BACKEND
"Use a specific Futhark backend, possible options are:
- c
- multicore
- opencl
- cuda"
"multicore")
register_flag_optional(FUTHARK_COMPILER
"Absolute path to the Futhark compiler, defaults to the futhark compiler on PATH"
"futhark")
macro(setup)
add_custom_command(
OUTPUT
${CMAKE_CURRENT_BINARY_DIR}/babelstream.c
${CMAKE_CURRENT_BINARY_DIR}/babelstream.h
COMMAND ${FUTHARK_COMPILER} ${FUTHARK_BACKEND}
--library src/futhark/babelstream.fut
-o ${CMAKE_CURRENT_BINARY_DIR}/babelstream
DEPENDS src/futhark/babelstream.fut
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
VERBATIM
)
if (${FUTHARK_BACKEND} STREQUAL "c")
# Nothing to do.
elseif (${FUTHARK_BACKEND} STREQUAL "multicore")
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
register_link_library(Threads::Threads)
elseif (${FUTHARK_BACKEND} STREQUAL "opencl")
find_package(OpenCL REQUIRED)
register_link_library(OpenCL::OpenCL)
elseif (${FUTHARK_BACKEND} STREQUAL "cuda")
find_package(CUDA REQUIRED)
register_link_library("nvrtc" "cuda" "cudart")
else ()
message(FATAL_ERROR "Unsupported Futhark backend: ${FUTHARK_BACKEND}")
endif()
endmacro()
macro(setup_target)
target_sources(${EXE_NAME} PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/babelstream.c")
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
endmacro()

View File

@ -49,6 +49,8 @@
#include "SYCLStream2020.h" #include "SYCLStream2020.h"
#elif defined(OMP) #elif defined(OMP)
#include "OMPStream.h" #include "OMPStream.h"
#elif defined(FUTHARK)
#include "FutharkStream.h"
#endif #endif
// Default size of 2^25 // Default size of 2^25
@ -298,6 +300,10 @@ void run()
// Use the OpenMP implementation // Use the OpenMP implementation
stream = new OMPStream<T>(ARRAY_SIZE, deviceIndex); stream = new OMPStream<T>(ARRAY_SIZE, deviceIndex);
#elif defined(FUTHARK)
// Use the Futhark implementation
stream = new FutharkStream<T>(ARRAY_SIZE, deviceIndex);
#endif #endif
stream->init_arrays(startA, startB, startC); stream->init_arrays(startA, startB, startC);