[CUDA] Add Managed memory and Page fault options
To use managed memory, compile the code defining MANAGED To use CUDA 8 page-fault memory, compile the code defining PAGEFAULT
This commit is contained in:
parent
b9c514fd9b
commit
62860284b2
@ -55,6 +55,21 @@ CUDAStream<T>::CUDAStream(const unsigned int ARRAY_SIZE, const int device_index)
|
|||||||
throw std::runtime_error("Device does not have enough memory for all 3 buffers");
|
throw std::runtime_error("Device does not have enough memory for all 3 buffers");
|
||||||
|
|
||||||
// Create device buffers
|
// Create device buffers
|
||||||
|
#if defined(MANAGED)
|
||||||
|
cudaMallocManaged(&d_a, ARRAY_SIZE*sizeof(T));
|
||||||
|
check_error();
|
||||||
|
cudaMallocManaged(&d_b, ARRAY_SIZE*sizeof(T));
|
||||||
|
check_error();
|
||||||
|
cudaMallocManaged(&d_c, ARRAY_SIZE*sizeof(T));
|
||||||
|
check_error();
|
||||||
|
cudaMallocManaged(&d_sum, DOT_NUM_BLOCKS*sizeof(T));
|
||||||
|
check_error();
|
||||||
|
#elif defined(PAGEFAULT)
|
||||||
|
d_a = (T*)malloc(sizeof(T)*ARRAY_SIZE);
|
||||||
|
d_b = (T*)malloc(sizeof(T)*ARRAY_SIZE);
|
||||||
|
d_c = (T*)malloc(sizeof(T)*ARRAY_SIZE);
|
||||||
|
d_sum = (T*)malloc(sizeof(T)*DOT_NUM_BLOCKS);
|
||||||
|
#else
|
||||||
cudaMalloc(&d_a, ARRAY_SIZE*sizeof(T));
|
cudaMalloc(&d_a, ARRAY_SIZE*sizeof(T));
|
||||||
check_error();
|
check_error();
|
||||||
cudaMalloc(&d_b, ARRAY_SIZE*sizeof(T));
|
cudaMalloc(&d_b, ARRAY_SIZE*sizeof(T));
|
||||||
@ -63,6 +78,7 @@ CUDAStream<T>::CUDAStream(const unsigned int ARRAY_SIZE, const int device_index)
|
|||||||
check_error();
|
check_error();
|
||||||
cudaMalloc(&d_sum, DOT_NUM_BLOCKS*sizeof(T));
|
cudaMalloc(&d_sum, DOT_NUM_BLOCKS*sizeof(T));
|
||||||
check_error();
|
check_error();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -71,6 +87,12 @@ CUDAStream<T>::~CUDAStream()
|
|||||||
{
|
{
|
||||||
free(sums);
|
free(sums);
|
||||||
|
|
||||||
|
#if defined(PAGEFAULT)
|
||||||
|
free(d_a);
|
||||||
|
free(d_b);
|
||||||
|
free(d_c);
|
||||||
|
free(d_sum);
|
||||||
|
#else
|
||||||
cudaFree(d_a);
|
cudaFree(d_a);
|
||||||
check_error();
|
check_error();
|
||||||
cudaFree(d_b);
|
cudaFree(d_b);
|
||||||
@ -79,6 +101,7 @@ CUDAStream<T>::~CUDAStream()
|
|||||||
check_error();
|
check_error();
|
||||||
cudaFree(d_sum);
|
cudaFree(d_sum);
|
||||||
check_error();
|
check_error();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -104,12 +127,22 @@ template <class T>
|
|||||||
void CUDAStream<T>::read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c)
|
void CUDAStream<T>::read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c)
|
||||||
{
|
{
|
||||||
// Copy device memory to host
|
// Copy device memory to host
|
||||||
|
#if defined(PAGEFAULT) || defined(MANAGED)
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
for (int i = 0; i < array_size; i++)
|
||||||
|
{
|
||||||
|
a[i] = d_a[i];
|
||||||
|
b[i] = d_b[i];
|
||||||
|
c[i] = d_c[i];
|
||||||
|
}
|
||||||
|
#else
|
||||||
cudaMemcpy(a.data(), d_a, a.size()*sizeof(T), cudaMemcpyDeviceToHost);
|
cudaMemcpy(a.data(), d_a, a.size()*sizeof(T), cudaMemcpyDeviceToHost);
|
||||||
check_error();
|
check_error();
|
||||||
cudaMemcpy(b.data(), d_b, b.size()*sizeof(T), cudaMemcpyDeviceToHost);
|
cudaMemcpy(b.data(), d_b, b.size()*sizeof(T), cudaMemcpyDeviceToHost);
|
||||||
check_error();
|
check_error();
|
||||||
cudaMemcpy(c.data(), d_c, c.size()*sizeof(T), cudaMemcpyDeviceToHost);
|
cudaMemcpy(c.data(), d_c, c.size()*sizeof(T), cudaMemcpyDeviceToHost);
|
||||||
check_error();
|
check_error();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -13,7 +13,13 @@
|
|||||||
|
|
||||||
#include "Stream.h"
|
#include "Stream.h"
|
||||||
|
|
||||||
#define IMPLEMENTATION_STRING "CUDA"
|
#if defined(PAGEFAULT)
|
||||||
|
#define IMPLEMENTATION_STRING "CUDA - Page Fault"
|
||||||
|
#elif defined(MANAGED)
|
||||||
|
#define IMPLEMENTATION_STRING "CUDA - Managed Memory"
|
||||||
|
#else
|
||||||
|
#define IMPLEMENTATION_STRING "CUDA"
|
||||||
|
#endif
|
||||||
|
|
||||||
#define TBSIZE 1024
|
#define TBSIZE 1024
|
||||||
#define DOT_NUM_BLOCKS 256
|
#define DOT_NUM_BLOCKS 256
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user