From 1a60f130eba160091f5cd5915caeca9ed4e536f9 Mon Sep 17 00:00:00 2001 From: Matthew Martineau Date: Fri, 6 May 2016 13:17:04 +0100 Subject: [PATCH] Fixed memory management for GPU, now working with OpenMP and CUDA --- RAJAStream.cpp | 44 +++++++++++++++++++++++++++++++++++--------- RAJAStream.hpp | 12 ++++++------ 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/RAJAStream.cpp b/RAJAStream.cpp index 5b1c980..eb98d54 100644 --- a/RAJAStream.cpp +++ b/RAJAStream.cpp @@ -16,21 +16,36 @@ RAJAStream::RAJAStream(const unsigned int ARRAY_SIZE, const int device_index) { RangeSegment seg(0, ARRAY_SIZE); index_set.push_back(seg); + +#ifdef RAJA_TARGET_CPU d_a = new T[ARRAY_SIZE]; d_b = new T[ARRAY_SIZE]; d_c = new T[ARRAY_SIZE]; +#else + cudaMallocManaged((void**)&d_a, sizeof(T)*ARRAY_SIZE, cudaMemAttachGlobal); + cudaMallocManaged((void**)&d_b, sizeof(T)*ARRAY_SIZE, cudaMemAttachGlobal); + cudaMallocManaged((void**)&d_c, sizeof(T)*ARRAY_SIZE, cudaMemAttachGlobal); + cudaDeviceSynchronize(); +#endif } template RAJAStream::~RAJAStream() { +#ifdef RAJA_TARGET_CPU delete[] d_a; delete[] d_b; delete[] d_c; +#else + cudaFree(d_a); + cudaFree(d_b); + cudaFree(d_c); +#endif } template -void RAJAStream::write_arrays(const std::vector& a, const std::vector& b, const std::vector& c) +void RAJAStream::write_arrays( + const std::vector& a, const std::vector& b, const std::vector& c) { std::copy(a.begin(), a.end(), d_a); std::copy(b.begin(), b.end(), d_b); @@ -38,48 +53,59 @@ void RAJAStream::write_arrays(const std::vector& a, const std::vector& } template -void RAJAStream::read_arrays(std::vector& a, std::vector& b, std::vector& c) +void RAJAStream::read_arrays( + std::vector& a, std::vector& b, std::vector& c) { - std::copy(d_a, d_a + array_size - 1, a.data()); - std::copy(d_b, d_b + array_size - 1, b.data()); - std::copy(d_c, d_c + array_size - 1, c.data()); + std::copy(d_a, d_a + array_size, a.data()); + std::copy(d_b, d_b + array_size, b.data()); + std::copy(d_c, d_c + array_size, c.data()); } template void RAJAStream::copy() { + T* a = d_a; + T* c = d_c; forall(index_set, [=] RAJA_DEVICE (int index) { - d_c[index] = d_a[index]; + c[index] = a[index]; }); } template void RAJAStream::mul() { + T* b = d_b; + T* c = d_c; const T scalar = 3.0; forall(index_set, [=] RAJA_DEVICE (int index) { - d_b[index] = scalar*d_c[index]; + b[index] = scalar*c[index]; }); } template void RAJAStream::add() { + T* a = d_a; + T* b = d_b; + T* c = d_c; forall(index_set, [=] RAJA_DEVICE (int index) { - d_c[index] = d_a[index] + d_b[index]; + c[index] = a[index] + b[index]; }); } template void RAJAStream::triad() { + T* a = d_a; + T* b = d_b; + T* c = d_c; const T scalar = 3.0; forall(index_set, [=] RAJA_DEVICE (int index) { - d_a[index] = d_b[index] + scalar*d_c[index]; + a[index] = b[index] + scalar*c[index]; }); } diff --git a/RAJAStream.hpp b/RAJAStream.hpp index a41c60e..454e20e 100644 --- a/RAJAStream.hpp +++ b/RAJAStream.hpp @@ -14,15 +14,15 @@ #define IMPLEMENTATION_STRING "RAJA" -#ifdef RAJA_USE_CUDA -const size_t block_size = 128; -typedef RAJA::IndexSet::ExecPolicy< - RAJA::seq_segit, - RAJA::cuda_exec_async> policy; -#else +#ifdef RAJA_TARGET_CPU typedef RAJA::IndexSet::ExecPolicy< RAJA::seq_segit, RAJA::omp_parallel_for_exec> policy; +#else +const size_t block_size = 128; +typedef RAJA::IndexSet::ExecPolicy< + RAJA::seq_segit, + RAJA::cuda_exec> policy; #endif template