diff --git a/CUDAStream.cu b/CUDAStream.cu index 7b1e0df..260c07e 100644 --- a/CUDAStream.cu +++ b/CUDAStream.cu @@ -55,6 +55,21 @@ CUDAStream::CUDAStream(const unsigned int ARRAY_SIZE, const int device_index) throw std::runtime_error("Device does not have enough memory for all 3 buffers"); // Create device buffers +#if defined(MANAGED) + cudaMallocManaged(&d_a, ARRAY_SIZE*sizeof(T)); + check_error(); + cudaMallocManaged(&d_b, ARRAY_SIZE*sizeof(T)); + check_error(); + cudaMallocManaged(&d_c, ARRAY_SIZE*sizeof(T)); + check_error(); + cudaMallocManaged(&d_sum, DOT_NUM_BLOCKS*sizeof(T)); + check_error(); +#elif defined(PAGEFAULT) + d_a = (T*)malloc(sizeof(T)*ARRAY_SIZE); + d_b = (T*)malloc(sizeof(T)*ARRAY_SIZE); + d_c = (T*)malloc(sizeof(T)*ARRAY_SIZE); + d_sum = (T*)malloc(sizeof(T)*DOT_NUM_BLOCKS); +#else cudaMalloc(&d_a, ARRAY_SIZE*sizeof(T)); check_error(); cudaMalloc(&d_b, ARRAY_SIZE*sizeof(T)); @@ -63,6 +78,7 @@ CUDAStream::CUDAStream(const unsigned int ARRAY_SIZE, const int device_index) check_error(); cudaMalloc(&d_sum, DOT_NUM_BLOCKS*sizeof(T)); check_error(); +#endif } @@ -71,6 +87,12 @@ CUDAStream::~CUDAStream() { free(sums); +#if defined(PAGEFAULT) + free(d_a); + free(d_b); + free(d_c); + free(d_sum); +#else cudaFree(d_a); check_error(); cudaFree(d_b); @@ -79,6 +101,7 @@ CUDAStream::~CUDAStream() check_error(); cudaFree(d_sum); check_error(); +#endif } @@ -104,12 +127,22 @@ template void CUDAStream::read_arrays(std::vector& a, std::vector& b, std::vector& c) { // Copy device memory to host +#if defined(PAGEFAULT) || defined(MANAGED) + cudaDeviceSynchronize(); + for (int i = 0; i < array_size; i++) + { + a[i] = d_a[i]; + b[i] = d_b[i]; + c[i] = d_c[i]; + } +#else cudaMemcpy(a.data(), d_a, a.size()*sizeof(T), cudaMemcpyDeviceToHost); check_error(); cudaMemcpy(b.data(), d_b, b.size()*sizeof(T), cudaMemcpyDeviceToHost); check_error(); cudaMemcpy(c.data(), d_c, c.size()*sizeof(T), cudaMemcpyDeviceToHost); check_error(); +#endif } diff --git a/CUDAStream.h b/CUDAStream.h index 0a0236b..2953a5f 100644 --- a/CUDAStream.h +++ b/CUDAStream.h @@ -13,7 +13,13 @@ #include "Stream.h" -#define IMPLEMENTATION_STRING "CUDA" +#if defined(PAGEFAULT) + #define IMPLEMENTATION_STRING "CUDA - Page Fault" +#elif defined(MANAGED) + #define IMPLEMENTATION_STRING "CUDA - Managed Memory" +#else + #define IMPLEMENTATION_STRING "CUDA" +#endif #define TBSIZE 1024 #define DOT_NUM_BLOCKS 256