Remove conditional sync after each kernel
Don't capture `this`, capture each member instead
This commit is contained in:
parent
aa82e57ba0
commit
ed6206b543
@ -34,8 +34,6 @@ T *alloc_raw(size_t size) { return sycl::malloc_shared<T>(size, exe_policy.queue
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
void dealloc_raw(T *ptr) { sycl::free(ptr, exe_policy.queue()); }
|
void dealloc_raw(T *ptr) { sycl::free(ptr, exe_policy.queue()); }
|
||||||
|
|
||||||
static void sync_device(){exe_policy.queue().wait_and_throw(); }
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
// auto exe_policy = dpl::execution::seq;
|
// auto exe_policy = dpl::execution::seq;
|
||||||
@ -74,6 +72,4 @@ T *alloc_raw(size_t size) { return (T *) aligned_alloc(ALIGNMENT, sizeof(T) * si
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
void dealloc_raw(T *ptr) { free(ptr); }
|
void dealloc_raw(T *ptr) { free(ptr); }
|
||||||
|
|
||||||
static void sync_device(){ /*no-op*/ }
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -69,7 +69,6 @@ void STDDataStream<T>::copy()
|
|||||||
{
|
{
|
||||||
// c[i] = a[i]
|
// c[i] = a[i]
|
||||||
std::copy(exe_policy, BEGIN(a), END(a), BEGIN(c));
|
std::copy(exe_policy, BEGIN(a), END(a), BEGIN(c));
|
||||||
sync_device();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
@ -77,7 +76,6 @@ void STDDataStream<T>::mul()
|
|||||||
{
|
{
|
||||||
// b[i] = scalar * c[i];
|
// b[i] = scalar * c[i];
|
||||||
std::transform(exe_policy, BEGIN(c), END(c), BEGIN(b), [scalar = startScalar](T ci){ return scalar*ci; });
|
std::transform(exe_policy, BEGIN(c), END(c), BEGIN(b), [scalar = startScalar](T ci){ return scalar*ci; });
|
||||||
sync_device();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
@ -85,7 +83,6 @@ void STDDataStream<T>::add()
|
|||||||
{
|
{
|
||||||
// c[i] = a[i] + b[i];
|
// c[i] = a[i] + b[i];
|
||||||
std::transform(exe_policy, BEGIN(a), END(a), BEGIN(b), BEGIN(c), std::plus<T>());
|
std::transform(exe_policy, BEGIN(a), END(a), BEGIN(b), BEGIN(c), std::plus<T>());
|
||||||
sync_device();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
@ -93,7 +90,6 @@ void STDDataStream<T>::triad()
|
|||||||
{
|
{
|
||||||
// a[i] = b[i] + scalar * c[i];
|
// a[i] = b[i] + scalar * c[i];
|
||||||
std::transform(exe_policy, BEGIN(b), END(b), BEGIN(c), BEGIN(a), [scalar = startScalar](T bi, T ci){ return bi+scalar*ci; });
|
std::transform(exe_policy, BEGIN(b), END(b), BEGIN(c), BEGIN(a), [scalar = startScalar](T bi, T ci){ return bi+scalar*ci; });
|
||||||
sync_device();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
@ -105,7 +101,6 @@ void STDDataStream<T>::nstream()
|
|||||||
// 2: a[i] += scalar * c[i];
|
// 2: a[i] += scalar * c[i];
|
||||||
std::transform(exe_policy, BEGIN(a), END(a), BEGIN(b), BEGIN(a), [](T ai, T bi){ return ai + bi; });
|
std::transform(exe_policy, BEGIN(a), END(a), BEGIN(b), BEGIN(a), [](T ai, T bi){ return ai + bi; });
|
||||||
std::transform(exe_policy, BEGIN(a), END(a), BEGIN(c), BEGIN(a), [scalar = startScalar](T ai, T ci){ return ai + scalar*ci; });
|
std::transform(exe_policy, BEGIN(a), END(a), BEGIN(c), BEGIN(a), [scalar = startScalar](T ai, T ci){ return ai + scalar*ci; });
|
||||||
sync_device();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -77,37 +77,33 @@ void STDIndicesStream<T>::copy()
|
|||||||
{
|
{
|
||||||
// c[i] = a[i]
|
// c[i] = a[i]
|
||||||
std::copy(exe_policy, BEGIN(a), END(a), BEGIN(c));
|
std::copy(exe_policy, BEGIN(a), END(a), BEGIN(c));
|
||||||
sync_device();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void STDIndicesStream<T>::mul()
|
void STDIndicesStream<T>::mul()
|
||||||
{
|
{
|
||||||
// b[i] = scalar * c[i];
|
// b[i] = scalar * c[i];
|
||||||
std::transform(exe_policy, range.begin(), range.end(), BEGIN(b), [this, scalar = startScalar](int i) {
|
std::transform(exe_policy, range.begin(), range.end(), BEGIN(b), [c = this->c, scalar = startScalar](int i) {
|
||||||
return scalar * c[i];
|
return scalar * c[i];
|
||||||
});
|
});
|
||||||
sync_device();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void STDIndicesStream<T>::add()
|
void STDIndicesStream<T>::add()
|
||||||
{
|
{
|
||||||
// c[i] = a[i] + b[i];
|
// c[i] = a[i] + b[i];
|
||||||
std::transform(exe_policy, range.begin(), range.end(), BEGIN(c), [this](int i) {
|
std::transform(exe_policy, range.begin(), range.end(), BEGIN(c), [a = this->a, b = this->b](int i) {
|
||||||
return a[i] + b[i];
|
return a[i] + b[i];
|
||||||
});
|
});
|
||||||
sync_device();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
void STDIndicesStream<T>::triad()
|
void STDIndicesStream<T>::triad()
|
||||||
{
|
{
|
||||||
// a[i] = b[i] + scalar * c[i];
|
// a[i] = b[i] + scalar * c[i];
|
||||||
std::transform(exe_policy, range.begin(), range.end(), BEGIN(a), [this, scalar = startScalar](int i) {
|
std::transform(exe_policy, range.begin(), range.end(), BEGIN(a), [b = this->b, c = this->c, scalar = startScalar](int i) {
|
||||||
return b[i] + scalar * c[i];
|
return b[i] + scalar * c[i];
|
||||||
});
|
});
|
||||||
sync_device();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
@ -117,10 +113,9 @@ void STDIndicesStream<T>::nstream()
|
|||||||
// Need to do in two stages with C++11 STL.
|
// Need to do in two stages with C++11 STL.
|
||||||
// 1: a[i] += b[i]
|
// 1: a[i] += b[i]
|
||||||
// 2: a[i] += scalar * c[i];
|
// 2: a[i] += scalar * c[i];
|
||||||
std::transform(exe_policy, range.begin(), range.end(), BEGIN(a), [this, scalar = startScalar](int i) {
|
std::transform(exe_policy, range.begin(), range.end(), BEGIN(a), [a = this->a, b = this->b, c = this->c, scalar = startScalar](int i) {
|
||||||
return a[i] + b[i] + scalar * c[i];
|
return a[i] + b[i] + scalar * c[i];
|
||||||
});
|
});
|
||||||
sync_device();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -89,7 +89,6 @@ void STDRangesStream<T>::copy()
|
|||||||
c[i] = a[i];
|
c[i] = a[i];
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
sync_device();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
@ -104,7 +103,6 @@ void STDRangesStream<T>::mul()
|
|||||||
b[i] = scalar * c[i];
|
b[i] = scalar * c[i];
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
sync_device();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
@ -117,7 +115,6 @@ void STDRangesStream<T>::add()
|
|||||||
c[i] = a[i] + b[i];
|
c[i] = a[i] + b[i];
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
sync_device();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
@ -132,7 +129,6 @@ void STDRangesStream<T>::triad()
|
|||||||
a[i] = b[i] + scalar * c[i];
|
a[i] = b[i] + scalar * c[i];
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
sync_device();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
@ -147,7 +143,6 @@ void STDRangesStream<T>::nstream()
|
|||||||
a[i] += b[i] + scalar * c[i];
|
a[i] += b[i] + scalar * c[i];
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
sync_device();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user