// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith, // University of Bristol HPC // // For full license terms please see the LICENSE file distributed with this // source code #include "RAJAStream.hpp" using RAJA::forall; using RAJA::RangeSegment; template RAJAStream::RAJAStream(const unsigned int ARRAY_SIZE, const int device_index) : array_size(ARRAY_SIZE) { RangeSegment seg(0, ARRAY_SIZE); index_set.push_back(seg); #ifdef RAJA_TARGET_CPU d_a = new T[ARRAY_SIZE]; d_b = new T[ARRAY_SIZE]; d_c = new T[ARRAY_SIZE]; forall(index_set, [=] RAJA_DEVICE (int index) { d_a[index] = 0.0; d_b[index] = 0.0; d_c[index] = 0.0; }); #else cudaMallocManaged((void**)&d_a, sizeof(T)*ARRAY_SIZE, cudaMemAttachGlobal); cudaMallocManaged((void**)&d_b, sizeof(T)*ARRAY_SIZE, cudaMemAttachGlobal); cudaMallocManaged((void**)&d_c, sizeof(T)*ARRAY_SIZE, cudaMemAttachGlobal); cudaDeviceSynchronize(); #endif } template RAJAStream::~RAJAStream() { #ifdef RAJA_TARGET_CPU delete[] d_a; delete[] d_b; delete[] d_c; #else cudaFree(d_a); cudaFree(d_b); cudaFree(d_c); #endif } template void RAJAStream::write_arrays( const std::vector& a, const std::vector& b, const std::vector& c) { std::copy(a.begin(), a.end(), d_a); std::copy(b.begin(), b.end(), d_b); std::copy(c.begin(), c.end(), d_c); } template void RAJAStream::read_arrays( std::vector& a, std::vector& b, std::vector& c) { std::copy(d_a, d_a + array_size, a.data()); std::copy(d_b, d_b + array_size, b.data()); std::copy(d_c, d_c + array_size, c.data()); } template void RAJAStream::copy() { T* a = d_a; T* c = d_c; forall(index_set, [=] RAJA_DEVICE (int index) { c[index] = a[index]; }); } template void RAJAStream::mul() { T* b = d_b; T* c = d_c; const T scalar = startScalar; forall(index_set, [=] RAJA_DEVICE (int index) { b[index] = scalar*c[index]; }); } template void RAJAStream::add() { T* a = d_a; T* b = d_b; T* c = d_c; forall(index_set, [=] RAJA_DEVICE (int index) { c[index] = a[index] + b[index]; }); } template void RAJAStream::triad() { T* a = d_a; T* b = d_b; T* c = d_c; const T scalar = startScalar; forall(index_set, [=] RAJA_DEVICE (int index) { a[index] = b[index] + scalar*c[index]; }); } template T RAJAStream::dot() { T* a = d_a; T* b = d_b; RAJA::ReduceSum sum(0.0); forall(index_set, [=] RAJA_DEVICE (int index) { sum += a[index] * b[index]; }); return T(sum); } void listDevices(void) { std::cout << "This is not the device you are looking for."; } std::string getDeviceName(const int device) { return "RAJA"; } std::string getDeviceDriver(const int device) { return "RAJA"; } template class RAJAStream; template class RAJAStream;