parent
2e3ebeecab
commit
92fed7082b
11
.github/workflows/main.yaml
vendored
11
.github/workflows/main.yaml
vendored
@ -226,3 +226,14 @@ jobs:
|
||||
- name: Test compile hipsycl @ CMake 3.24
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./src/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_24_BIN }}
|
||||
|
||||
test-futhark:
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Prepare Futhark compiler
|
||||
uses: diku-dk/install-futhark@HEAD
|
||||
with:
|
||||
version: 'latest'
|
||||
- run: cmake -Bbuild -H. -DMODEL=futhark -DFUTHARK_BACKEND=multicore
|
||||
- run: cmake --build build
|
||||
|
||||
@ -4,7 +4,7 @@ if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
|
||||
cmake_policy(SET CMP0135 NEW)
|
||||
endif ()
|
||||
|
||||
project(BabelStream VERSION 4.0 LANGUAGES CXX)
|
||||
project(BabelStream VERSION 4.0 LANGUAGES CXX C)
|
||||
|
||||
# uncomment for debugging build issues:
|
||||
#set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
@ -162,6 +162,7 @@ register_model(acc ACC ACCStream.cpp)
|
||||
register_model(raja USE_RAJA RAJAStream.cpp)
|
||||
register_model(tbb TBB TBBStream.cpp)
|
||||
register_model(thrust THRUST ThrustStream.cu) # Thrust uses cu, even for rocThrust
|
||||
register_model(futhark FUTHARK FutharkStream.cpp)
|
||||
|
||||
|
||||
set(USAGE ON CACHE BOOL "Whether to print all custom flags for the selected model")
|
||||
|
||||
@ -41,6 +41,7 @@ BabelStream is currently implemented in the following parallel programming model
|
||||
- SYCL and SYCL2020 (USM and accessors)
|
||||
- TBB
|
||||
- Thrust (via CUDA or HIP)
|
||||
- Futhark
|
||||
|
||||
This project also contains implementations in alternative languages with different build systems:
|
||||
* Julia - [JuliaStream.jl](./src/julia/JuliaStream.jl)
|
||||
@ -101,7 +102,7 @@ The source for each model's implementations are located in `./src/<model>`.
|
||||
|
||||
Currently available models are:
|
||||
```
|
||||
omp;ocl;std-data;std-indices;std-ranges;hip;cuda;kokkos;sycl;sycl2020;acc;raja;tbb;thrust
|
||||
omp;ocl;std-data;std-indices;std-ranges;hip;cuda;kokkos;sycl;sycl2020;acc;raja;tbb;thrust;futhark
|
||||
```
|
||||
|
||||
#### Overriding default flags
|
||||
|
||||
212
src/futhark/FutharkStream.cpp
Normal file
212
src/futhark/FutharkStream.cpp
Normal file
@ -0,0 +1,212 @@
|
||||
// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith,
|
||||
// University of Bristol HPC
|
||||
// Copyright (c) 2022 Troels Henriksen
|
||||
// University of Copenhagen
|
||||
//
|
||||
// For full license terms please see the LICENSE file distributed with this
|
||||
// source code
|
||||
|
||||
#include <cstdlib> // For aligned_alloc
|
||||
#include <string>
|
||||
#include "FutharkStream.h"
|
||||
|
||||
template <class T>
|
||||
FutharkStream<T>::FutharkStream(const int ARRAY_SIZE, int device)
|
||||
{
|
||||
this->array_size = ARRAY_SIZE;
|
||||
this->cfg = futhark_context_config_new();
|
||||
this->device = "#" + std::to_string(device);
|
||||
#if defined(FUTHARK_BACKEND_cuda) || defined(FUTHARK_BACKEND_opencl)
|
||||
futhark_context_config_set_device(cfg, this->device.c_str());
|
||||
#endif
|
||||
this->ctx = futhark_context_new(cfg);
|
||||
this->a = NULL;
|
||||
this->b = NULL;
|
||||
this->c = NULL;
|
||||
}
|
||||
|
||||
template <>
|
||||
FutharkStream<float>::~FutharkStream()
|
||||
{
|
||||
if (this->a) {
|
||||
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->a);
|
||||
}
|
||||
if (this->b) {
|
||||
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->b);
|
||||
}
|
||||
if (this->c) {
|
||||
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c);
|
||||
}
|
||||
futhark_context_free(this->ctx);
|
||||
futhark_context_config_free(this->cfg);
|
||||
}
|
||||
|
||||
template <>
|
||||
FutharkStream<double>::~FutharkStream()
|
||||
{
|
||||
if (this->a) {
|
||||
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->a);
|
||||
}
|
||||
if (this->b) {
|
||||
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->b);
|
||||
}
|
||||
if (this->c) {
|
||||
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->c);
|
||||
}
|
||||
futhark_context_free(this->ctx);
|
||||
futhark_context_config_free(this->cfg);
|
||||
}
|
||||
|
||||
template <>
|
||||
void FutharkStream<float>::init_arrays(float initA, float initB, float initC) {
|
||||
int array_size = this->array_size;
|
||||
float *a = new float[array_size];
|
||||
float *b = new float[array_size];
|
||||
float *c = new float[array_size];
|
||||
for (int i = 0; i < array_size; i++) {
|
||||
a[i] = initA;
|
||||
b[i] = initB;
|
||||
c[i] = initC;
|
||||
}
|
||||
this->a = (futhark_f32_1d*)futhark_new_f32_1d(this->ctx, a, array_size);
|
||||
this->b = (futhark_f32_1d*)futhark_new_f32_1d(this->ctx, b, array_size);
|
||||
this->c = (futhark_f32_1d*)futhark_new_f32_1d(this->ctx, c, array_size);
|
||||
futhark_context_sync(this->ctx);
|
||||
delete[] a;
|
||||
delete[] b;
|
||||
delete[] c;
|
||||
}
|
||||
|
||||
template <>
|
||||
void FutharkStream<double>::init_arrays(double initA, double initB, double initC) {
|
||||
int array_size = this->array_size;
|
||||
double *a = new double[array_size];
|
||||
double *b = new double[array_size];
|
||||
double *c = new double[array_size];
|
||||
for (int i = 0; i < array_size; i++) {
|
||||
a[i] = initA;
|
||||
b[i] = initB;
|
||||
c[i] = initC;
|
||||
}
|
||||
this->a = (futhark_f64_1d*)futhark_new_f64_1d(this->ctx, a, array_size);
|
||||
this->b = (futhark_f64_1d*)futhark_new_f64_1d(this->ctx, b, array_size);
|
||||
this->c = (futhark_f64_1d*)futhark_new_f64_1d(this->ctx, c, array_size);
|
||||
futhark_context_sync(this->ctx);
|
||||
delete[] a;
|
||||
delete[] b;
|
||||
delete[] c;
|
||||
}
|
||||
|
||||
template <>
|
||||
void FutharkStream<float>::read_arrays(std::vector<float>& h_a, std::vector<float>& h_b, std::vector<float>& h_c) {
|
||||
futhark_values_f32_1d(this->ctx, (futhark_f32_1d*)this->a, h_a.data());
|
||||
futhark_values_f32_1d(this->ctx, (futhark_f32_1d*)this->b, h_b.data());
|
||||
futhark_values_f32_1d(this->ctx, (futhark_f32_1d*)this->c, h_c.data());
|
||||
futhark_context_sync(this->ctx);
|
||||
}
|
||||
|
||||
template <>
|
||||
void FutharkStream<double>::read_arrays(std::vector<double>& h_a, std::vector<double>& h_b, std::vector<double>& h_c) {
|
||||
futhark_values_f64_1d(this->ctx, (futhark_f64_1d*)this->a, h_a.data());
|
||||
futhark_values_f64_1d(this->ctx, (futhark_f64_1d*)this->b, h_b.data());
|
||||
futhark_values_f64_1d(this->ctx, (futhark_f64_1d*)this->c, h_c.data());
|
||||
futhark_context_sync(this->ctx);
|
||||
}
|
||||
|
||||
template <>
|
||||
void FutharkStream<float>::copy() {
|
||||
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c);
|
||||
futhark_entry_f32_copy(this->ctx, (futhark_f32_1d**)&this->c, (futhark_f32_1d*)this->a);
|
||||
futhark_context_sync(this->ctx);
|
||||
}
|
||||
|
||||
template <>
|
||||
void FutharkStream<double>::copy() {
|
||||
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->c);
|
||||
futhark_entry_f64_copy(this->ctx, (futhark_f64_1d**)&this->c, (futhark_f64_1d*)this->a);
|
||||
futhark_context_sync(this->ctx);
|
||||
}
|
||||
|
||||
template <>
|
||||
void FutharkStream<float>::mul() {
|
||||
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->b);
|
||||
futhark_entry_f32_mul(this->ctx, (futhark_f32_1d**)&this->b, (futhark_f32_1d*)this->c);
|
||||
futhark_context_sync(this->ctx);
|
||||
}
|
||||
|
||||
template <>
|
||||
void FutharkStream<double>::mul() {
|
||||
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->b);
|
||||
futhark_entry_f64_mul(this->ctx, (futhark_f64_1d**)&this->b, (futhark_f64_1d*)this->c);
|
||||
futhark_context_sync(this->ctx);
|
||||
}
|
||||
|
||||
template <>
|
||||
void FutharkStream<float>::add() {
|
||||
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c);
|
||||
futhark_entry_f32_add(this->ctx, (futhark_f32_1d**)&this->c, (futhark_f32_1d*)this->a, (futhark_f32_1d*)this->b);
|
||||
futhark_context_sync(this->ctx);
|
||||
}
|
||||
|
||||
template <>
|
||||
void FutharkStream<double>::add() {
|
||||
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->c);
|
||||
futhark_entry_f64_add(this->ctx, (futhark_f64_1d**)&this->c, (futhark_f64_1d*)this->a, (futhark_f64_1d*)this->b);
|
||||
futhark_context_sync(this->ctx);
|
||||
}
|
||||
|
||||
template <>
|
||||
void FutharkStream<float>::triad() {
|
||||
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c);
|
||||
futhark_entry_f32_triad(this->ctx, (futhark_f32_1d**)&this->c, (futhark_f32_1d*)this->a, (futhark_f32_1d*)this->b);
|
||||
futhark_context_sync(this->ctx);
|
||||
}
|
||||
|
||||
template <>
|
||||
void FutharkStream<double>::triad() {
|
||||
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->a);
|
||||
futhark_entry_f64_triad(this->ctx, (futhark_f64_1d**)&this->a, (futhark_f64_1d*)this->b, (futhark_f64_1d*)this->c);
|
||||
futhark_context_sync(this->ctx);
|
||||
}
|
||||
|
||||
template <>
|
||||
void FutharkStream<float>::nstream() {
|
||||
futhark_f32_1d* d;
|
||||
futhark_entry_f32_triad(this->ctx, &d, (futhark_f32_1d*)this->a, (futhark_f32_1d*)this->b);
|
||||
futhark_free_f32_1d(this->ctx, (futhark_f32_1d*)this->c);
|
||||
this->c = d;
|
||||
futhark_context_sync(this->ctx);
|
||||
}
|
||||
|
||||
template <>
|
||||
void FutharkStream<double>::nstream() {
|
||||
futhark_f64_1d* d;
|
||||
futhark_entry_f64_triad(this->ctx, &d, (futhark_f64_1d*)this->a, (futhark_f64_1d*)this->b);
|
||||
futhark_free_f64_1d(this->ctx, (futhark_f64_1d*)this->c);
|
||||
this->c = d;
|
||||
futhark_context_sync(this->ctx);
|
||||
}
|
||||
|
||||
template <>
|
||||
float FutharkStream<float>::dot() {
|
||||
float res;
|
||||
futhark_entry_f32_dot(this->ctx, &res, (futhark_f32_1d*)this->a, (futhark_f32_1d*)this->b);
|
||||
futhark_context_sync(this->ctx);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
double FutharkStream<double>::dot() {
|
||||
double res;
|
||||
futhark_entry_f64_dot(this->ctx, &res, (futhark_f64_1d*)this->a, (futhark_f64_1d*)this->b);
|
||||
futhark_context_sync(this->ctx);
|
||||
return res;
|
||||
}
|
||||
|
||||
void listDevices(void)
|
||||
{
|
||||
std::cout << "Device selection not supported." << std::endl;
|
||||
}
|
||||
|
||||
template class FutharkStream<float>;
|
||||
template class FutharkStream<double>;
|
||||
60
src/futhark/FutharkStream.h
Normal file
60
src/futhark/FutharkStream.h
Normal file
@ -0,0 +1,60 @@
|
||||
// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith,
|
||||
// University of Bristol HPC
|
||||
// Copyright (c) 2022 Troels Henriksen
|
||||
// University of Copenhagen
|
||||
//
|
||||
// For full license terms please see the LICENSE file distributed with this
|
||||
// source code
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "Stream.h"
|
||||
#include "babelstream.h"
|
||||
|
||||
#if defined(FUTHARK_BACKEND_c)
|
||||
#define IMPLEMENTATION_STRING "Futhark (sequential)"
|
||||
#elif defined(FUTHARK_BACKEND_multicore)
|
||||
#define IMPLEMENTATION_STRING "Futhark (parallel CPU)"
|
||||
#elif defined(FUTHARK_BACKEND_opencl)
|
||||
#define IMPLEMENTATION_STRING "Futhark (OpencL)"
|
||||
#elif defined(FUTHARK_BACKEND_cuda)
|
||||
#define IMPLEMENTATION_STRING "Futhark (CUDA)"
|
||||
#else
|
||||
#define IMPLEMENTATION_STRING "Futhark (unknown backend)"
|
||||
#endif
|
||||
|
||||
template <class T>
|
||||
class FutharkStream : public Stream<T>
|
||||
{
|
||||
protected:
|
||||
// Size of arrays
|
||||
int array_size;
|
||||
// For device selection.
|
||||
std::string device;
|
||||
|
||||
// Futhark stuff
|
||||
struct futhark_context_config *cfg;
|
||||
struct futhark_context *ctx;
|
||||
|
||||
// Device side arrays
|
||||
void* a;
|
||||
void* b;
|
||||
void* c;
|
||||
|
||||
public:
|
||||
FutharkStream(const int, int);
|
||||
~FutharkStream();
|
||||
|
||||
virtual void copy() override;
|
||||
virtual void add() override;
|
||||
virtual void mul() override;
|
||||
virtual void triad() override;
|
||||
virtual void nstream() override;
|
||||
virtual T dot() override;
|
||||
|
||||
virtual void init_arrays(T initA, T initB, T initC) override;
|
||||
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
||||
};
|
||||
62
src/futhark/babelstream.fut
Normal file
62
src/futhark/babelstream.fut
Normal file
@ -0,0 +1,62 @@
|
||||
module type kernels = {
|
||||
type t
|
||||
val copy [n] : [n]t -> *[n]t
|
||||
val mul [n] : t -> [n]t -> [n]t
|
||||
val add [n] : [n]t -> [n]t -> [n]t
|
||||
val triad [n] : t -> [n]t -> [n]t -> [n]t
|
||||
val dot [n] : [n]t -> [n]t -> t
|
||||
-- Uniqueness allows nstream to mutate the 'a' array.
|
||||
val nstream [n] : t -> *[n]t -> [n]t -> [n]t -> [n]t
|
||||
}
|
||||
|
||||
module kernels (P: real) : kernels with t = P.t = {
|
||||
type t = P.t
|
||||
def copy = copy
|
||||
def mul scalar c = map (P.*scalar) c
|
||||
def add = map2 (P.+)
|
||||
def triad scalar b c = map2 (P.+) b (map (P.* scalar) c)
|
||||
def dot a b = reduce (P.+) (P.i32 0) (map2 (P.*) a b)
|
||||
def nstream scalar a b c = map2 (P.+) a (map2 (P.+) b (map (P.*scalar) c))
|
||||
}
|
||||
|
||||
module f32_kernels = kernels f32
|
||||
def f32_start_scalar : f32 = 0.4
|
||||
entry f32_copy = f32_kernels.copy
|
||||
entry f32_mul = f32_kernels.mul f32_start_scalar
|
||||
entry f32_add = f32_kernels.add
|
||||
entry f32_triad = f32_kernels.triad f32_start_scalar
|
||||
entry f32_nstream = f32_kernels.nstream f32_start_scalar
|
||||
entry f32_dot = f32_kernels.dot
|
||||
|
||||
module f64_kernels = kernels f64
|
||||
def f64_start_scalar : f64 = 0.4
|
||||
entry f64_copy = f64_kernels.copy
|
||||
entry f64_mul = f64_kernels.mul f64_start_scalar
|
||||
entry f64_add = f64_kernels.add
|
||||
entry f64_triad = f64_kernels.triad f64_start_scalar
|
||||
entry f64_nstream = f64_kernels.nstream f64_start_scalar
|
||||
entry f64_dot = f64_kernels.dot
|
||||
|
||||
-- ==
|
||||
-- entry: f32_copy f32_mul
|
||||
-- random input { [33554432]f32 }
|
||||
|
||||
-- ==
|
||||
-- entry: f32_add f32_dot f32_triad
|
||||
-- random input { [33554432]f32 [33554432]f32 }
|
||||
|
||||
-- ==
|
||||
-- entry: f32_nstream
|
||||
-- random input { [33554432]f32 [33554432]f32 [33554432]f32 }
|
||||
|
||||
-- ==
|
||||
-- entry: f64_copy f64_mul
|
||||
-- random input { [33554432]f64 }
|
||||
|
||||
-- ==
|
||||
-- entry: f64_add f64_dot f64_triad
|
||||
-- random input { [33554432]f64 [33554432]f64 }
|
||||
|
||||
-- ==
|
||||
-- entry: f64_nstream
|
||||
-- random input { [33554432]f64 [33554432]f64 [33554432]f64 }
|
||||
55
src/futhark/model.cmake
Normal file
55
src/futhark/model.cmake
Normal file
@ -0,0 +1,55 @@
|
||||
# Use
|
||||
#
|
||||
# cmake -Bbuild -H. -DMODEL=futhark -DFUTHARK_BACKEND=foo -DFUTHARK_COMPILER=foo/bar/bin/futhark
|
||||
#
|
||||
# to use the Futhark backend, where 'foo' must be one of 'multicore',
|
||||
# 'c', 'opencl', or 'cuda'. Defaults to 'multicore'.
|
||||
#
|
||||
# Use -DFUTHARK_COMPILER to set the path to the Futhark compiler
|
||||
# binary. Defaults to 'futhark' on the PATH.
|
||||
|
||||
register_flag_optional(FUTHARK_BACKEND
|
||||
"Use a specific Futhark backend, possible options are:
|
||||
- c
|
||||
- multicore
|
||||
- opencl
|
||||
- cuda"
|
||||
"multicore")
|
||||
|
||||
register_flag_optional(FUTHARK_COMPILER
|
||||
"Absolute path to the Futhark compiler, defaults to the futhark compiler on PATH"
|
||||
"futhark")
|
||||
|
||||
macro(setup)
|
||||
add_custom_command(
|
||||
OUTPUT
|
||||
${CMAKE_CURRENT_BINARY_DIR}/babelstream.c
|
||||
${CMAKE_CURRENT_BINARY_DIR}/babelstream.h
|
||||
COMMAND ${FUTHARK_COMPILER} ${FUTHARK_BACKEND}
|
||||
--library src/futhark/babelstream.fut
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/babelstream
|
||||
DEPENDS src/futhark/babelstream.fut
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
VERBATIM
|
||||
)
|
||||
if (${FUTHARK_BACKEND} STREQUAL "c")
|
||||
# Nothing to do.
|
||||
elseif (${FUTHARK_BACKEND} STREQUAL "multicore")
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
register_link_library(Threads::Threads)
|
||||
elseif (${FUTHARK_BACKEND} STREQUAL "opencl")
|
||||
find_package(OpenCL REQUIRED)
|
||||
register_link_library(OpenCL::OpenCL)
|
||||
elseif (${FUTHARK_BACKEND} STREQUAL "cuda")
|
||||
find_package(CUDA REQUIRED)
|
||||
register_link_library("nvrtc" "cuda" "cudart")
|
||||
else ()
|
||||
message(FATAL_ERROR "Unsupported Futhark backend: ${FUTHARK_BACKEND}")
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(setup_target)
|
||||
target_sources(${EXE_NAME} PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/babelstream.c")
|
||||
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
|
||||
endmacro()
|
||||
@ -49,6 +49,8 @@
|
||||
#include "SYCLStream2020.h"
|
||||
#elif defined(OMP)
|
||||
#include "OMPStream.h"
|
||||
#elif defined(FUTHARK)
|
||||
#include "FutharkStream.h"
|
||||
#endif
|
||||
|
||||
// Default size of 2^25
|
||||
@ -298,6 +300,10 @@ void run()
|
||||
// Use the OpenMP implementation
|
||||
stream = new OMPStream<T>(ARRAY_SIZE, deviceIndex);
|
||||
|
||||
#elif defined(FUTHARK)
|
||||
// Use the Futhark implementation
|
||||
stream = new FutharkStream<T>(ARRAY_SIZE, deviceIndex);
|
||||
|
||||
#endif
|
||||
|
||||
stream->init_arrays(startA, startB, startC);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user