From 0fc67226845819e8ef47166c790e81be2ae163f0 Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Mon, 27 Feb 2017 16:35:03 +0100 Subject: [PATCH] added Makefile and code for HC --- HC.make | 16 ++++++++ HCStream.cpp | 104 +++++++++++++++++++++++---------------------------- HCStream.h | 3 +- 3 files changed, 64 insertions(+), 59 deletions(-) create mode 100644 HC.make diff --git a/HC.make b/HC.make new file mode 100644 index 0000000..3ec099e --- /dev/null +++ b/HC.make @@ -0,0 +1,16 @@ + +# TODO: HC with HCC + +HCC = hcc + + + +CXXFLAGS+=-hc -stdlib=libc++ -I/opt/rocm/hcc-lc/include +LDFLAGS+=-hc -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi -ldl -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive + +hc-stream: main.cpp HCStream.cpp + $(HCC) $(CXXFLAGS) -DHC $^ $(LDFLAGS) $(EXTRA_FLAGS) -o $@ + +.PHONY: clean +clean: + rm -f hc-stream diff --git a/HCStream.cpp b/HCStream.cpp index 8cea1dc..2aa617c 100644 --- a/HCStream.cpp +++ b/HCStream.cpp @@ -7,10 +7,9 @@ #include #include #include - +#include #include "HCStream.h" -//#include "hc.hpp" #define TBSIZE 1024 @@ -25,7 +24,7 @@ void listDevices(void) { // Get number of devices std::vector accs = hc::accelerator::get_all(); - + // Print device names if (accs.empty()) { @@ -43,15 +42,6 @@ void listDevices(void) } } -// void check_error(void) -// { -// hipError_t err = hipGetLastError(); -// if (err != hipSuccess) -// { -// std::cerr << "Error: " << hipGetErrorString(err) << std::endl; -// exit(err); -// } -// } template HCStream::HCStream(const unsigned int ARRAY_SIZE, const int device_index): @@ -76,45 +66,7 @@ HCStream::HCStream(const unsigned int ARRAY_SIZE, const int device_index): hc::accelerator::set_default(current.get_device_path()); std::cout << "Using HC device " << getDeviceName(current) << std::endl; - - // // The array size must be divisible by TBSIZE for kernel launches - // if (ARRAY_SIZE % TBSIZE != 0) - // { - // std::stringstream ss; - // ss << "Array size must be a multiple of " << TBSIZE; - // throw std::runtime_error(ss.str()); - // } - // // Set device - // int count; - // hipGetDeviceCount(&count); - // check_error(); - // if (device_index >= count) - // throw std::runtime_error("Invalid device index"); - // hipSetDevice(device_index); - // check_error(); - - // // Print out device information - // std::cout << "Using HIP device " << getDeviceName(device_index) << std::endl; - // std::cout << "Driver: " << getDeviceDriver(device_index) << std::endl; - - // array_size = ARRAY_SIZE; - - // // Check buffers fit on the device - // hipDeviceProp_t props; - // hipGetDeviceProperties(&props, 0); - // if (props.totalGlobalMem < 3*ARRAY_SIZE*sizeof(T)) - // throw std::runtime_error("Device does not have enough memory for all 3 buffers"); - - // // Create device buffers - // hipMalloc(&d_a, ARRAY_SIZE*sizeof(T)); - // check_error(); - // hipMalloc(&d_b, ARRAY_SIZE*sizeof(T)); - // check_error(); - // hipMalloc(&d_c, ARRAY_SIZE*sizeof(T)); - // check_error(); - - } @@ -124,11 +76,17 @@ HCStream::~HCStream() } template -void HCStream::write_arrays(const std::vector& a, const std::vector& b, const std::vector& c) +void HCStream::init_arrays(T _a, T _b, T _c) { - hc::copy(a.cbegin(),a.cend(),d_a); - hc::copy(b.cbegin(),b.cend(),d_b); - hc::copy(c.cbegin(),c.cend(),d_c); + std::vector temp(array_size,_a); + hc::copy(temp.begin(), temp.end(),this->d_a); + + std::fill(temp.begin(), temp.end(),_b); + hc::copy(temp.begin(), temp.end(),this->d_b); + + std::fill(temp.begin(), temp.end(),_c); + hc::copy(temp.begin(), temp.end(),this->d_c); + } template @@ -157,7 +115,7 @@ void HCStream::copy() future_kernel.wait(); } catch(std::exception& e){ - std::cout << e.what() << std::endl; + std::cout << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl; throw; } } @@ -178,7 +136,7 @@ void HCStream::mul() future_kernel.wait(); } catch(std::exception& e){ - std::cout << e.what() << std::endl; + std::cout << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl; throw; } } @@ -200,7 +158,7 @@ void HCStream::add() future_kernel.wait(); } catch(std::exception& e){ - std::cout << e.what() << std::endl; + std::cout << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl; throw; } } @@ -222,10 +180,40 @@ void HCStream::triad() future_kernel.wait(); } catch(std::exception& e){ - std::cout << e.what() << std::endl; + std::cout << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl; throw; } } +template +T HCStream::dot() +{ + hc::array& device_a = this->d_a; + hc::array product = this->d_b; + + T sum = static_cast(0); + + try{ + // launch a GPU kernel to compute the saxpy in parallel + hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size) + , [&](hc::index<1> i) [[hc]] { + product[i] *= device_a[i]; + }); + future_kernel.wait(); + } + catch(std::exception& e){ + std::cout << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl; + throw; + } + + std::vector h_product(array_size,sum); + hc::copy(product,h_product.begin()); + + sum = std::accumulate(h_product.begin(), h_product.end(),sum); + + return sum; +} + + template class HCStream; template class HCStream; diff --git a/HCStream.h b/HCStream.h index a625be0..950b370 100644 --- a/HCStream.h +++ b/HCStream.h @@ -37,8 +37,9 @@ public: virtual void add() override; virtual void mul() override; virtual void triad() override; + virtual T dot() override; - virtual void write_arrays(const std::vector& a, const std::vector& b, const std::vector& c) override; + virtual void init_arrays(T initA, T initB, T initC) override; virtual void read_arrays(std::vector& a, std::vector& b, std::vector& c) override; };