From ed6206b54398f785ce3d7f2dfe048a98fd3d7a21 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Thu, 28 Jul 2022 23:45:43 +0100 Subject: [PATCH] Remove conditional sync after each kernel Don't capture `this`, capture each member instead --- src/dpl_shim.h | 4 ---- src/std-data/STDDataStream.cpp | 15 +++++---------- src/std-indices/STDIndicesStream.cpp | 23 +++++++++-------------- src/std-ranges/STDRangesStream.cpp | 15 +++++---------- 4 files changed, 19 insertions(+), 38 deletions(-) diff --git a/src/dpl_shim.h b/src/dpl_shim.h index d341a59..e47ae99 100644 --- a/src/dpl_shim.h +++ b/src/dpl_shim.h @@ -34,8 +34,6 @@ T *alloc_raw(size_t size) { return sycl::malloc_shared(size, exe_policy.queue template void dealloc_raw(T *ptr) { sycl::free(ptr, exe_policy.queue()); } -static void sync_device(){exe_policy.queue().wait_and_throw(); } - #else // auto exe_policy = dpl::execution::seq; @@ -74,6 +72,4 @@ T *alloc_raw(size_t size) { return (T *) aligned_alloc(ALIGNMENT, sizeof(T) * si template void dealloc_raw(T *ptr) { free(ptr); } -static void sync_device(){ /*no-op*/ } - #endif diff --git a/src/std-data/STDDataStream.cpp b/src/std-data/STDDataStream.cpp index d4dc17f..7c71163 100644 --- a/src/std-data/STDDataStream.cpp +++ b/src/std-data/STDDataStream.cpp @@ -69,32 +69,28 @@ void STDDataStream::copy() { // c[i] = a[i] std::copy(exe_policy, BEGIN(a), END(a), BEGIN(c)); - sync_device(); -} + } template void STDDataStream::mul() { // b[i] = scalar * c[i]; std::transform(exe_policy, BEGIN(c), END(c), BEGIN(b), [scalar = startScalar](T ci){ return scalar*ci; }); - sync_device(); -} + } template void STDDataStream::add() { // c[i] = a[i] + b[i]; std::transform(exe_policy, BEGIN(a), END(a), BEGIN(b), BEGIN(c), std::plus()); - sync_device(); -} + } template void STDDataStream::triad() { // a[i] = b[i] + scalar * c[i]; std::transform(exe_policy, BEGIN(b), END(b), BEGIN(c), BEGIN(a), [scalar = startScalar](T bi, T ci){ return bi+scalar*ci; }); - sync_device(); -} + } template void STDDataStream::nstream() @@ -105,8 +101,7 @@ void STDDataStream::nstream() // 2: a[i] += scalar * c[i]; std::transform(exe_policy, BEGIN(a), END(a), BEGIN(b), BEGIN(a), [](T ai, T bi){ return ai + bi; }); std::transform(exe_policy, BEGIN(a), END(a), BEGIN(c), BEGIN(a), [scalar = startScalar](T ai, T ci){ return ai + scalar*ci; }); - sync_device(); -} + } template diff --git a/src/std-indices/STDIndicesStream.cpp b/src/std-indices/STDIndicesStream.cpp index 04b7829..f9397fa 100644 --- a/src/std-indices/STDIndicesStream.cpp +++ b/src/std-indices/STDIndicesStream.cpp @@ -77,38 +77,34 @@ void STDIndicesStream::copy() { // c[i] = a[i] std::copy(exe_policy, BEGIN(a), END(a), BEGIN(c)); - sync_device(); -} + } template void STDIndicesStream::mul() { // b[i] = scalar * c[i]; - std::transform(exe_policy, range.begin(), range.end(), BEGIN(b), [this, scalar = startScalar](int i) { + std::transform(exe_policy, range.begin(), range.end(), BEGIN(b), [c = this->c, scalar = startScalar](int i) { return scalar * c[i]; }); - sync_device(); -} + } template void STDIndicesStream::add() { // c[i] = a[i] + b[i]; - std::transform(exe_policy, range.begin(), range.end(), BEGIN(c), [this](int i) { + std::transform(exe_policy, range.begin(), range.end(), BEGIN(c), [a = this->a, b = this->b](int i) { return a[i] + b[i]; }); - sync_device(); -} + } template void STDIndicesStream::triad() { // a[i] = b[i] + scalar * c[i]; - std::transform(exe_policy, range.begin(), range.end(), BEGIN(a), [this, scalar = startScalar](int i) { + std::transform(exe_policy, range.begin(), range.end(), BEGIN(a), [b = this->b, c = this->c, scalar = startScalar](int i) { return b[i] + scalar * c[i]; }); - sync_device(); -} + } template void STDIndicesStream::nstream() @@ -117,11 +113,10 @@ void STDIndicesStream::nstream() // Need to do in two stages with C++11 STL. // 1: a[i] += b[i] // 2: a[i] += scalar * c[i]; - std::transform(exe_policy, range.begin(), range.end(), BEGIN(a), [this, scalar = startScalar](int i) { + std::transform(exe_policy, range.begin(), range.end(), BEGIN(a), [a = this->a, b = this->b, c = this->c, scalar = startScalar](int i) { return a[i] + b[i] + scalar * c[i]; }); - sync_device(); -} + } template diff --git a/src/std-ranges/STDRangesStream.cpp b/src/std-ranges/STDRangesStream.cpp index 8a77a68..9063ff2 100644 --- a/src/std-ranges/STDRangesStream.cpp +++ b/src/std-ranges/STDRangesStream.cpp @@ -89,8 +89,7 @@ void STDRangesStream::copy() c[i] = a[i]; } ); - sync_device(); -} + } template void STDRangesStream::mul() @@ -104,8 +103,7 @@ void STDRangesStream::mul() b[i] = scalar * c[i]; } ); - sync_device(); -} + } template void STDRangesStream::add() @@ -117,8 +115,7 @@ void STDRangesStream::add() c[i] = a[i] + b[i]; } ); - sync_device(); -} + } template void STDRangesStream::triad() @@ -132,8 +129,7 @@ void STDRangesStream::triad() a[i] = b[i] + scalar * c[i]; } ); - sync_device(); -} + } template void STDRangesStream::nstream() @@ -147,8 +143,7 @@ void STDRangesStream::nstream() a[i] += b[i] + scalar * c[i]; } ); - sync_device(); -} + } template T STDRangesStream::dot()