Add clarifying comment and further clean-up
This commit is contained in:
parent
f98aedf64d
commit
de93c06e78
@ -27,8 +27,6 @@ HIPStream<T>::HIPStream(const int ARRAY_SIZE, const int device_index)
|
||||
block_count(array_size / (TBSIZE * elements_per_lane))
|
||||
{
|
||||
|
||||
std::cerr << "Elements per lane: " << elements_per_lane << std::endl;
|
||||
std::cerr << "Chunks per block: " << chunks_per_block << std::endl;
|
||||
// The array size must be divisible by total number of elements
|
||||
// moved per block for kernel launches
|
||||
if (ARRAY_SIZE % (TBSIZE * elements_per_lane) != 0)
|
||||
@ -39,7 +37,6 @@ HIPStream<T>::HIPStream(const int ARRAY_SIZE, const int device_index)
|
||||
<< ").";
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
std::cerr << "block count " << block_count << std::endl;
|
||||
|
||||
// Set device
|
||||
int count;
|
||||
@ -56,7 +53,10 @@ HIPStream<T>::HIPStream(const int ARRAY_SIZE, const int device_index)
|
||||
|
||||
array_size = ARRAY_SIZE;
|
||||
|
||||
// Allocate the host array for partial sums for dot kernels
|
||||
// Allocate the host array for partial sums for dot kernels using hipHostMalloc.
|
||||
// This creates an array on the host which is visible to the device. However, it requires
|
||||
// synchronization (e.g. hipDeviceSynchronize) for the result to be available on the host
|
||||
// after it has been passed through to a kernel.
|
||||
hipHostMalloc(&sums, sizeof(T) * block_count, hipHostMallocNonCoherent);
|
||||
check_error();
|
||||
|
||||
|
||||
@ -55,7 +55,6 @@ class HIPStream : public Stream<T>
|
||||
T *d_a;
|
||||
T *d_b;
|
||||
T *d_c;
|
||||
T *d_sum;
|
||||
|
||||
|
||||
public:
|
||||
|
||||
@ -5,6 +5,5 @@ register_flag_required(CMAKE_CXX_COMPILER
|
||||
register_flag_optional(DWORDS_PER_LANE "Flag indicating the number of dwords to process per wavefront lane." 4)
|
||||
|
||||
macro(setup)
|
||||
# Ensure we set the proper preprocessor directives
|
||||
register_definitions(DWORDS_PER_LANE=${DWORDS_PER_LANE})
|
||||
endmacro()
|
||||
Loading…
Reference in New Issue
Block a user