diff --git a/src/CUDAStream.cu b/src/CUDAStream.cu index 66304d1..f325dfc 100644 --- a/src/CUDAStream.cu +++ b/src/CUDAStream.cu @@ -23,6 +23,23 @@ CUDAStream::CUDAStream(const unsigned int ARRAY_SIZE) check_error(); } + +template +void CUDAStream::write_arrays(const std::vector& a, const std::vector& b, const std::vector& c) +{ + // Copy host memory to device + cudaMemcpy(d_a, a.data(), a.size()*sizeof(T), cudaMemcpyHostToDevice); + check_error(); + cudaMemcpy(d_b, b.data(), b.size()*sizeof(T), cudaMemcpyHostToDevice); + check_error(); + cudaMemcpy(d_c, c.data(), c.size()*sizeof(T), cudaMemcpyHostToDevice); + check_error(); +} + +template +void CUDAStream::read_arrays(std::vector& a, std::vector& b, std::vector& c) +{ +} template __global__ void copy_kernel(const T * a, T * c) { @@ -34,6 +51,9 @@ template void CUDAStream::copy() { copy_kernel<<<1024, 1024>>>(d_a, d_c); + check_error(); + cudaDeviceSynchronize(); + check_error(); } template diff --git a/src/CUDAStream.h b/src/CUDAStream.h index 49e76df..881811e 100644 --- a/src/CUDAStream.h +++ b/src/CUDAStream.h @@ -22,4 +22,8 @@ class CUDAStream : public Stream void mul(); void triad(); + void write_arrays(const std::vector& a, const std::vector& b, const std::vector& c); + void read_arrays(std::vector& a, std::vector& b, std::vector& c); + }; + diff --git a/src/Stream.h b/src/Stream.h index 3a3e826..bb83d69 100644 --- a/src/Stream.h +++ b/src/Stream.h @@ -14,7 +14,9 @@ class Stream virtual void add() = 0; virtual void triad() = 0; - + // Copy memory between host and device + virtual void write_arrays(const std::vector& a, const std::vector& b, const std::vector& c) = 0; + virtual void read_arrays(std::vector& a, std::vector& b, std::vector& c) = 0; // Implementation specific device functions static std::vector getDeviceList(); diff --git a/src/main.cpp b/src/main.cpp index a293eb7..cfb8884 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -29,6 +29,8 @@ int main(int argc, char *argv[]) // Use the CUDA implementation stream = new CUDAStream(ARRAY_SIZE); + stream->write_arrays(a, b, c); + stream->copy(); delete[] stream;