diff --git a/HCStream.cpp b/HCStream.cpp index 2aa617c..a91f6a3 100644 --- a/HCStream.cpp +++ b/HCStream.cpp @@ -78,6 +78,7 @@ HCStream::~HCStream() template void HCStream::init_arrays(T _a, T _b, T _c) { + std::cout << __FILE__ << ":" << __LINE__ << "\t" << "initializing arrays\n"; std::vector temp(array_size,_a); hc::copy(temp.begin(), temp.end(),this->d_a); @@ -93,6 +94,7 @@ template void HCStream::read_arrays(std::vector& a, std::vector& b, std::vector& c) { // Copy device memory to host + std::cout << __FILE__ << ":" << __LINE__ << "\t" << "read arrays\n"; hc::copy(d_a,a.begin()); hc::copy(d_b,b.begin()); hc::copy(d_c,c.begin()); @@ -103,6 +105,7 @@ template void HCStream::copy() { + std::cout << __FILE__ << ":" << __LINE__ << "\t" << "copy\n"; hc::array& device_a = this->d_a; hc::array& device_c = this->d_c; @@ -123,6 +126,7 @@ void HCStream::copy() template void HCStream::mul() { + std::cout << __FILE__ << ":" << __LINE__ << "\t" << "mul\n"; const T scalar = 0.3; hc::array& device_b = this->d_b; hc::array& device_c = this->d_c; @@ -144,7 +148,7 @@ void HCStream::mul() template void HCStream::add() { - + std::cout << __FILE__ << ":" << __LINE__ << "\t" << "add\n"; hc::array& device_a = this->d_a; hc::array& device_b = this->d_b; hc::array& device_c = this->d_c; @@ -166,6 +170,7 @@ void HCStream::add() template void HCStream::triad() { + std::cout << __FILE__ << ":" << __LINE__ << "\t" << "triad\n"; const T scalar = 0.3; hc::array& device_a = this->d_a; hc::array& device_b = this->d_b; @@ -188,16 +193,17 @@ void HCStream::triad() template T HCStream::dot() { - hc::array& device_a = this->d_a; - hc::array product = this->d_b; + std::cout << __FILE__ << ":" << __LINE__ << "\t" << "dot\n"; + hc::array_view view_a(this->d_a); + hc::array_view view_p(this->d_b); T sum = static_cast(0); try{ // launch a GPU kernel to compute the saxpy in parallel - hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size) - , [&](hc::index<1> i) [[hc]] { - product[i] *= device_a[i]; + hc::completion_future future_kernel = hc::parallel_for_each(view_a.get_extent(), + [&](hc::index<1> i) [[hc]] { + view_p[i] = view_p[i]*view_a[i]; }); future_kernel.wait(); } @@ -206,8 +212,11 @@ T HCStream::dot() throw; } + std::cout << __FILE__ << ":" << __LINE__ << "\t" << "dot - for_each done\n"; std::vector h_product(array_size,sum); - hc::copy(product,h_product.begin()); + hc::copy(view_p,h_product.begin()); + + std::cout << __FILE__ << ":" << __LINE__ << "\t" << "dot - copy-out done\n"; sum = std::accumulate(h_product.begin(), h_product.end(),sum);