added Makefile and code for HC
This commit is contained in:
parent
62ea5e3ed6
commit
0fc6722684
16
HC.make
Normal file
16
HC.make
Normal file
@ -0,0 +1,16 @@
|
||||
|
||||
# TODO: HC with HCC
|
||||
|
||||
HCC = hcc
|
||||
|
||||
|
||||
|
||||
CXXFLAGS+=-hc -stdlib=libc++ -I/opt/rocm/hcc-lc/include
|
||||
LDFLAGS+=-hc -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi -ldl -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive
|
||||
|
||||
hc-stream: main.cpp HCStream.cpp
|
||||
$(HCC) $(CXXFLAGS) -DHC $^ $(LDFLAGS) $(EXTRA_FLAGS) -o $@
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -f hc-stream
|
||||
102
HCStream.cpp
102
HCStream.cpp
@ -7,10 +7,9 @@
|
||||
#include <codecvt>
|
||||
#include <vector>
|
||||
#include <locale>
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#include "HCStream.h"
|
||||
//#include "hc.hpp"
|
||||
|
||||
#define TBSIZE 1024
|
||||
|
||||
@ -43,15 +42,6 @@ void listDevices(void)
|
||||
}
|
||||
}
|
||||
|
||||
// void check_error(void)
|
||||
// {
|
||||
// hipError_t err = hipGetLastError();
|
||||
// if (err != hipSuccess)
|
||||
// {
|
||||
// std::cerr << "Error: " << hipGetErrorString(err) << std::endl;
|
||||
// exit(err);
|
||||
// }
|
||||
// }
|
||||
|
||||
template <class T>
|
||||
HCStream<T>::HCStream(const unsigned int ARRAY_SIZE, const int device_index):
|
||||
@ -77,44 +67,6 @@ HCStream<T>::HCStream(const unsigned int ARRAY_SIZE, const int device_index):
|
||||
|
||||
std::cout << "Using HC device " << getDeviceName(current) << std::endl;
|
||||
|
||||
// // The array size must be divisible by TBSIZE for kernel launches
|
||||
// if (ARRAY_SIZE % TBSIZE != 0)
|
||||
// {
|
||||
// std::stringstream ss;
|
||||
// ss << "Array size must be a multiple of " << TBSIZE;
|
||||
// throw std::runtime_error(ss.str());
|
||||
// }
|
||||
|
||||
// // Set device
|
||||
// int count;
|
||||
// hipGetDeviceCount(&count);
|
||||
// check_error();
|
||||
// if (device_index >= count)
|
||||
// throw std::runtime_error("Invalid device index");
|
||||
// hipSetDevice(device_index);
|
||||
// check_error();
|
||||
|
||||
// // Print out device information
|
||||
// std::cout << "Using HIP device " << getDeviceName(device_index) << std::endl;
|
||||
// std::cout << "Driver: " << getDeviceDriver(device_index) << std::endl;
|
||||
|
||||
// array_size = ARRAY_SIZE;
|
||||
|
||||
// // Check buffers fit on the device
|
||||
// hipDeviceProp_t props;
|
||||
// hipGetDeviceProperties(&props, 0);
|
||||
// if (props.totalGlobalMem < 3*ARRAY_SIZE*sizeof(T))
|
||||
// throw std::runtime_error("Device does not have enough memory for all 3 buffers");
|
||||
|
||||
// // Create device buffers
|
||||
// hipMalloc(&d_a, ARRAY_SIZE*sizeof(T));
|
||||
// check_error();
|
||||
// hipMalloc(&d_b, ARRAY_SIZE*sizeof(T));
|
||||
// check_error();
|
||||
// hipMalloc(&d_c, ARRAY_SIZE*sizeof(T));
|
||||
// check_error();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -124,11 +76,17 @@ HCStream<T>::~HCStream()
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void HCStream<T>::write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c)
|
||||
void HCStream<T>::init_arrays(T _a, T _b, T _c)
|
||||
{
|
||||
hc::copy(a.cbegin(),a.cend(),d_a);
|
||||
hc::copy(b.cbegin(),b.cend(),d_b);
|
||||
hc::copy(c.cbegin(),c.cend(),d_c);
|
||||
std::vector<T> temp(array_size,_a);
|
||||
hc::copy(temp.begin(), temp.end(),this->d_a);
|
||||
|
||||
std::fill(temp.begin(), temp.end(),_b);
|
||||
hc::copy(temp.begin(), temp.end(),this->d_b);
|
||||
|
||||
std::fill(temp.begin(), temp.end(),_c);
|
||||
hc::copy(temp.begin(), temp.end(),this->d_c);
|
||||
|
||||
}
|
||||
|
||||
template <class T>
|
||||
@ -157,7 +115,7 @@ void HCStream<T>::copy()
|
||||
future_kernel.wait();
|
||||
}
|
||||
catch(std::exception& e){
|
||||
std::cout << e.what() << std::endl;
|
||||
std::cout << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
@ -178,7 +136,7 @@ void HCStream<T>::mul()
|
||||
future_kernel.wait();
|
||||
}
|
||||
catch(std::exception& e){
|
||||
std::cout << e.what() << std::endl;
|
||||
std::cout << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
@ -200,7 +158,7 @@ void HCStream<T>::add()
|
||||
future_kernel.wait();
|
||||
}
|
||||
catch(std::exception& e){
|
||||
std::cout << e.what() << std::endl;
|
||||
std::cout << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
@ -222,10 +180,40 @@ void HCStream<T>::triad()
|
||||
future_kernel.wait();
|
||||
}
|
||||
catch(std::exception& e){
|
||||
std::cout << e.what() << std::endl;
|
||||
std::cout << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
T HCStream<T>::dot()
|
||||
{
|
||||
hc::array<T,1>& device_a = this->d_a;
|
||||
hc::array<T,1> product = this->d_b;
|
||||
|
||||
T sum = static_cast<T>(0);
|
||||
|
||||
try{
|
||||
// launch a GPU kernel to compute the saxpy in parallel
|
||||
hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
|
||||
, [&](hc::index<1> i) [[hc]] {
|
||||
product[i] *= device_a[i];
|
||||
});
|
||||
future_kernel.wait();
|
||||
}
|
||||
catch(std::exception& e){
|
||||
std::cout << __FILE__ << ":" << __LINE__ << "\t" << e.what() << std::endl;
|
||||
throw;
|
||||
}
|
||||
|
||||
std::vector<T> h_product(array_size,sum);
|
||||
hc::copy(product,h_product.begin());
|
||||
|
||||
sum = std::accumulate(h_product.begin(), h_product.end(),sum);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
template class HCStream<float>;
|
||||
template class HCStream<double>;
|
||||
|
||||
@ -37,8 +37,9 @@ public:
|
||||
virtual void add() override;
|
||||
virtual void mul() override;
|
||||
virtual void triad() override;
|
||||
virtual T dot() override;
|
||||
|
||||
virtual void write_arrays(const std::vector<T>& a, const std::vector<T>& b, const std::vector<T>& c) override;
|
||||
virtual void init_arrays(T initA, T initB, T initC) override;
|
||||
virtual void read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<T>& c) override;
|
||||
|
||||
};
|
||||
|
||||
Loading…
Reference in New Issue
Block a user