Add clarifying comment and further clean-up
This commit is contained in:
parent
f98aedf64d
commit
de93c06e78
@ -27,8 +27,6 @@ HIPStream<T>::HIPStream(const int ARRAY_SIZE, const int device_index)
|
|||||||
block_count(array_size / (TBSIZE * elements_per_lane))
|
block_count(array_size / (TBSIZE * elements_per_lane))
|
||||||
{
|
{
|
||||||
|
|
||||||
std::cerr << "Elements per lane: " << elements_per_lane << std::endl;
|
|
||||||
std::cerr << "Chunks per block: " << chunks_per_block << std::endl;
|
|
||||||
// The array size must be divisible by total number of elements
|
// The array size must be divisible by total number of elements
|
||||||
// moved per block for kernel launches
|
// moved per block for kernel launches
|
||||||
if (ARRAY_SIZE % (TBSIZE * elements_per_lane) != 0)
|
if (ARRAY_SIZE % (TBSIZE * elements_per_lane) != 0)
|
||||||
@ -39,7 +37,6 @@ HIPStream<T>::HIPStream(const int ARRAY_SIZE, const int device_index)
|
|||||||
<< ").";
|
<< ").";
|
||||||
throw std::runtime_error(ss.str());
|
throw std::runtime_error(ss.str());
|
||||||
}
|
}
|
||||||
std::cerr << "block count " << block_count << std::endl;
|
|
||||||
|
|
||||||
// Set device
|
// Set device
|
||||||
int count;
|
int count;
|
||||||
@ -56,7 +53,10 @@ HIPStream<T>::HIPStream(const int ARRAY_SIZE, const int device_index)
|
|||||||
|
|
||||||
array_size = ARRAY_SIZE;
|
array_size = ARRAY_SIZE;
|
||||||
|
|
||||||
// Allocate the host array for partial sums for dot kernels
|
// Allocate the host array for partial sums for dot kernels using hipHostMalloc.
|
||||||
|
// This creates an array on the host which is visible to the device. However, it requires
|
||||||
|
// synchronization (e.g. hipDeviceSynchronize) for the result to be available on the host
|
||||||
|
// after it has been passed through to a kernel.
|
||||||
hipHostMalloc(&sums, sizeof(T) * block_count, hipHostMallocNonCoherent);
|
hipHostMalloc(&sums, sizeof(T) * block_count, hipHostMallocNonCoherent);
|
||||||
check_error();
|
check_error();
|
||||||
|
|
||||||
|
|||||||
@ -55,7 +55,6 @@ class HIPStream : public Stream<T>
|
|||||||
T *d_a;
|
T *d_a;
|
||||||
T *d_b;
|
T *d_b;
|
||||||
T *d_c;
|
T *d_c;
|
||||||
T *d_sum;
|
|
||||||
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|||||||
@ -5,6 +5,5 @@ register_flag_required(CMAKE_CXX_COMPILER
|
|||||||
register_flag_optional(DWORDS_PER_LANE "Flag indicating the number of dwords to process per wavefront lane." 4)
|
register_flag_optional(DWORDS_PER_LANE "Flag indicating the number of dwords to process per wavefront lane." 4)
|
||||||
|
|
||||||
macro(setup)
|
macro(setup)
|
||||||
# Ensure we set the proper preprocessor directives
|
|
||||||
register_definitions(DWORDS_PER_LANE=${DWORDS_PER_LANE})
|
register_definitions(DWORDS_PER_LANE=${DWORDS_PER_LANE})
|
||||||
endmacro()
|
endmacro()
|
||||||
Loading…
Reference in New Issue
Block a user