[CUDA] Add Managed memory and Page fault options

To use managed memory, compile the code defining MANAGED
To use CUDA 8 page-fault memory, compile the code defining PAGEFAULT
This commit is contained in:
Tom Deakin 2016-12-19 05:00:15 -07:00
parent b9c514fd9b
commit 62860284b2
2 changed files with 40 additions and 1 deletions

View File

@ -55,6 +55,21 @@ CUDAStream<T>::CUDAStream(const unsigned int ARRAY_SIZE, const int device_index)
throw std::runtime_error("Device does not have enough memory for all 3 buffers"); throw std::runtime_error("Device does not have enough memory for all 3 buffers");
// Create device buffers // Create device buffers
#if defined(MANAGED)
cudaMallocManaged(&d_a, ARRAY_SIZE*sizeof(T));
check_error();
cudaMallocManaged(&d_b, ARRAY_SIZE*sizeof(T));
check_error();
cudaMallocManaged(&d_c, ARRAY_SIZE*sizeof(T));
check_error();
cudaMallocManaged(&d_sum, DOT_NUM_BLOCKS*sizeof(T));
check_error();
#elif defined(PAGEFAULT)
d_a = (T*)malloc(sizeof(T)*ARRAY_SIZE);
d_b = (T*)malloc(sizeof(T)*ARRAY_SIZE);
d_c = (T*)malloc(sizeof(T)*ARRAY_SIZE);
d_sum = (T*)malloc(sizeof(T)*DOT_NUM_BLOCKS);
#else
cudaMalloc(&d_a, ARRAY_SIZE*sizeof(T)); cudaMalloc(&d_a, ARRAY_SIZE*sizeof(T));
check_error(); check_error();
cudaMalloc(&d_b, ARRAY_SIZE*sizeof(T)); cudaMalloc(&d_b, ARRAY_SIZE*sizeof(T));
@ -63,6 +78,7 @@ CUDAStream<T>::CUDAStream(const unsigned int ARRAY_SIZE, const int device_index)
check_error(); check_error();
cudaMalloc(&d_sum, DOT_NUM_BLOCKS*sizeof(T)); cudaMalloc(&d_sum, DOT_NUM_BLOCKS*sizeof(T));
check_error(); check_error();
#endif
} }
@ -71,6 +87,12 @@ CUDAStream<T>::~CUDAStream()
{ {
free(sums); free(sums);
#if defined(PAGEFAULT)
free(d_a);
free(d_b);
free(d_c);
free(d_sum);
#else
cudaFree(d_a); cudaFree(d_a);
check_error(); check_error();
cudaFree(d_b); cudaFree(d_b);
@ -79,6 +101,7 @@ CUDAStream<T>::~CUDAStream()
check_error(); check_error();
cudaFree(d_sum); cudaFree(d_sum);
check_error(); check_error();
#endif
} }
@ -104,12 +127,22 @@ template <class T>
void CUDAStream<T>::read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) void CUDAStream<T>::read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c)
{ {
// Copy device memory to host // Copy device memory to host
#if defined(PAGEFAULT) || defined(MANAGED)
cudaDeviceSynchronize();
for (int i = 0; i < array_size; i++)
{
a[i] = d_a[i];
b[i] = d_b[i];
c[i] = d_c[i];
}
#else
cudaMemcpy(a.data(), d_a, a.size()*sizeof(T), cudaMemcpyDeviceToHost); cudaMemcpy(a.data(), d_a, a.size()*sizeof(T), cudaMemcpyDeviceToHost);
check_error(); check_error();
cudaMemcpy(b.data(), d_b, b.size()*sizeof(T), cudaMemcpyDeviceToHost); cudaMemcpy(b.data(), d_b, b.size()*sizeof(T), cudaMemcpyDeviceToHost);
check_error(); check_error();
cudaMemcpy(c.data(), d_c, c.size()*sizeof(T), cudaMemcpyDeviceToHost); cudaMemcpy(c.data(), d_c, c.size()*sizeof(T), cudaMemcpyDeviceToHost);
check_error(); check_error();
#endif
} }

View File

@ -13,7 +13,13 @@
#include "Stream.h" #include "Stream.h"
#if defined(PAGEFAULT)
#define IMPLEMENTATION_STRING "CUDA - Page Fault"
#elif defined(MANAGED)
#define IMPLEMENTATION_STRING "CUDA - Managed Memory"
#else
#define IMPLEMENTATION_STRING "CUDA" #define IMPLEMENTATION_STRING "CUDA"
#endif
#define TBSIZE 1024 #define TBSIZE 1024
#define DOT_NUM_BLOCKS 256 #define DOT_NUM_BLOCKS 256