Store array size in class so can use it for kernel launches

This commit is contained in:
Tom Deakin 2016-04-26 16:09:51 +01:00
parent 319e11011c
commit 9c673317a7
2 changed files with 8 additions and 4 deletions

View File

@ -14,6 +14,8 @@ void check_error(void)
template <class T> template <class T>
CUDAStream<T>::CUDAStream(const unsigned int ARRAY_SIZE) CUDAStream<T>::CUDAStream(const unsigned int ARRAY_SIZE)
{ {
array_size = ARRAY_SIZE;
// Create device buffers // Create device buffers
cudaMalloc(&d_a, ARRAY_SIZE*sizeof(T)); cudaMalloc(&d_a, ARRAY_SIZE*sizeof(T));
check_error(); check_error();
@ -59,7 +61,7 @@ __global__ void copy_kernel(const T * a, T * c)
template <class T> template <class T>
void CUDAStream<T>::copy() void CUDAStream<T>::copy()
{ {
copy_kernel<<<1024, 1024>>>(d_a, d_c); copy_kernel<<<array_size/1024, 1024>>>(d_a, d_c);
check_error(); check_error();
cudaDeviceSynchronize(); cudaDeviceSynchronize();
check_error(); check_error();
@ -76,7 +78,7 @@ __global__ void mul_kernel(T * b, const T * c)
template <class T> template <class T>
void CUDAStream<T>::mul() void CUDAStream<T>::mul()
{ {
mul_kernel<<<1024, 1024>>>(d_b, d_c); mul_kernel<<<array_size/1024, 1024>>>(d_b, d_c);
check_error(); check_error();
cudaDeviceSynchronize(); cudaDeviceSynchronize();
check_error(); check_error();
@ -92,7 +94,7 @@ __global__ void add_kernel(const T * a, const T * b, T * c)
template <class T> template <class T>
void CUDAStream<T>::add() void CUDAStream<T>::add()
{ {
add_kernel<<<1024, 1024>>>(d_a, d_b, d_c); add_kernel<<<array_size/1024, 1024>>>(d_a, d_b, d_c);
check_error(); check_error();
cudaDeviceSynchronize(); cudaDeviceSynchronize();
check_error(); check_error();
@ -109,7 +111,7 @@ __global__ void triad_kernel(T * a, const T * b, const T * c)
template <class T> template <class T>
void CUDAStream<T>::triad() void CUDAStream<T>::triad()
{ {
triad_kernel<<<1024, 1024>>>(d_a, d_b, d_c); triad_kernel<<<array_size/1024, 1024>>>(d_a, d_b, d_c);
check_error(); check_error();
cudaDeviceSynchronize(); cudaDeviceSynchronize();
check_error(); check_error();

View File

@ -7,6 +7,8 @@ template <class T>
class CUDAStream : public Stream<T> class CUDAStream : public Stream<T>
{ {
private: private:
// Size of arrays
unsigned int array_size;
// Device side pointers to arrays // Device side pointers to arrays
T *d_a; T *d_a;
T *d_b; T *d_b;