From 383c5a3ae70fa767ae91382a8db9d59eab6561d4 Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Tue, 28 Feb 2017 10:00:44 +0100 Subject: [PATCH] all required operations implemented, errors are too large --- HCStream.cpp | 108 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 74 insertions(+), 34 deletions(-) diff --git a/HCStream.cpp b/HCStream.cpp index a91f6a3..6c59fe0 100644 --- a/HCStream.cpp +++ b/HCStream.cpp @@ -78,7 +78,49 @@ HCStream::~HCStream() template void HCStream::init_arrays(T _a, T _b, T _c) { - std::cout << __FILE__ << ":" << __LINE__ << "\t" << "initializing arrays\n"; + // hc::array_view view_a(this->d_a); + // hc::array_view view_b(this->d_b); + // hc::array_view view_c(this->d_c); + + // hc::completion_future future_a= hc::parallel_for_each(hc::extent<1>(array_size) + // , [=](hc::index<1> i) [[hc]] { + // view_a[i] = _a; + // }); + + // hc::completion_future future_b= hc::parallel_for_each(hc::extent<1>(array_size) + // , [=](hc::index<1> i) [[hc]] { + // view_b[i] = _b; + // }); + // hc::completion_future future_c= hc::parallel_for_each(hc::extent<1>(array_size) + // , [=](hc::index<1> i) [[hc]] { + // view_c[i] = _c; + // }); + // try{ + // future_a.wait(); + // } + // catch(std::exception& e){ + // std::cout << __FILE__ << ":" << __LINE__ << "\t future_a " << e.what() << std::endl; + // throw; + // } + + // try{ + // future_b.wait(); + // } + // catch(std::exception& e){ + // std::cout << __FILE__ << ":" << __LINE__ << "\t future_b " << e.what() << std::endl; + // throw; + // } + + + // try{ + // future_c.wait(); + // } + // catch(std::exception& e){ + // std::cout << __FILE__ << ":" << __LINE__ << "\t future_c " << e.what() << std::endl; + // throw; + // } + + std::vector temp(array_size,_a); hc::copy(temp.begin(), temp.end(),this->d_a); @@ -93,8 +135,8 @@ void HCStream::init_arrays(T _a, T _b, T _c) template void HCStream::read_arrays(std::vector& a, std::vector& b, std::vector& c) { + // Copy device memory to host - std::cout << __FILE__ << ":" << __LINE__ << "\t" << "read arrays\n"; hc::copy(d_a,a.begin()); hc::copy(d_b,b.begin()); hc::copy(d_c,c.begin()); @@ -105,20 +147,19 @@ template void HCStream::copy() { - std::cout << __FILE__ << ":" << __LINE__ << "\t" << "copy\n"; - hc::array& device_a = this->d_a; - hc::array& device_c = this->d_c; + hc::array_view view_a = this->d_a; + hc::array_view view_c = this->d_c; try{ // launch a GPU kernel to compute the saxpy in parallel hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size) - , [&](hc::index<1> index) [[hc]] { - device_c[index] = device_a[index]; + , [=](hc::index<1> index) [[hc]] { + view_c[index] = view_a[index]; }); future_kernel.wait(); } catch(std::exception& e){ - std::cout << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl; + std::cerr << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl; throw; } } @@ -126,21 +167,21 @@ void HCStream::copy() template void HCStream::mul() { - std::cout << __FILE__ << ":" << __LINE__ << "\t" << "mul\n"; + const T scalar = 0.3; - hc::array& device_b = this->d_b; - hc::array& device_c = this->d_c; + hc::array_view view_b = this->d_b; + hc::array_view view_c = this->d_c; try{ // launch a GPU kernel to compute the saxpy in parallel hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size) - , [&](hc::index<1> i) [[hc]] { - device_b[i] = scalar*device_c[i]; + , [=](hc::index<1> i) [[hc]] { + view_b[i] = scalar*view_c[i]; }); future_kernel.wait(); } catch(std::exception& e){ - std::cout << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl; + std::cerr << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl; throw; } } @@ -148,21 +189,22 @@ void HCStream::mul() template void HCStream::add() { - std::cout << __FILE__ << ":" << __LINE__ << "\t" << "add\n"; - hc::array& device_a = this->d_a; - hc::array& device_b = this->d_b; - hc::array& device_c = this->d_c; + + + hc::array_view view_a(this->d_a); + hc::array_view view_b(this->d_b); + hc::array_view view_c(this->d_c); try{ // launch a GPU kernel to compute the saxpy in parallel hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size) - , [&](hc::index<1> i) [[hc]] { - device_c[i] = device_a[i]+device_b[i]; + , [=](hc::index<1> i) [[hc]] { + view_c[i] = view_a[i]+view_b[i]; }); future_kernel.wait(); } catch(std::exception& e){ - std::cout << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl; + std::cerr << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl; throw; } } @@ -170,22 +212,22 @@ void HCStream::add() template void HCStream::triad() { - std::cout << __FILE__ << ":" << __LINE__ << "\t" << "triad\n"; + const T scalar = 0.3; - hc::array& device_a = this->d_a; - hc::array& device_b = this->d_b; - hc::array& device_c = this->d_c; + hc::array_view view_a(this->d_a); + hc::array_view view_b(this->d_b); + hc::array_view view_c(this->d_c); try{ // launch a GPU kernel to compute the saxpy in parallel hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size) - , [&](hc::index<1> i) [[hc]] { - device_a[i] = device_b[i] + scalar*device_c[i]; + , [=](hc::index<1> i) [[hc]] { + view_a[i] = view_b[i] + scalar*view_c[i]; }); future_kernel.wait(); } catch(std::exception& e){ - std::cout << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl; + std::cerr << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl; throw; } } @@ -193,7 +235,7 @@ void HCStream::triad() template T HCStream::dot() { - std::cout << __FILE__ << ":" << __LINE__ << "\t" << "dot\n"; + hc::array_view view_a(this->d_a); hc::array_view view_p(this->d_b); @@ -202,22 +244,20 @@ T HCStream::dot() try{ // launch a GPU kernel to compute the saxpy in parallel hc::completion_future future_kernel = hc::parallel_for_each(view_a.get_extent(), - [&](hc::index<1> i) [[hc]] { + [=](hc::index<1> i) [[hc]] { view_p[i] = view_p[i]*view_a[i]; }); future_kernel.wait(); } catch(std::exception& e){ - std::cout << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl; + std::cerr << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl; throw; } - std::cout << __FILE__ << ":" << __LINE__ << "\t" << "dot - for_each done\n"; + std::vector h_product(array_size,sum); hc::copy(view_p,h_product.begin()); - std::cout << __FILE__ << ":" << __LINE__ << "\t" << "dot - copy-out done\n"; - sum = std::accumulate(h_product.begin(), h_product.end(),sum); return sum;