fixed setting the accelerator, CMakeLists still needs some tweeks to not build OpenCL
This commit is contained in:
parent
7621f86701
commit
47d2bf275f
@ -58,24 +58,6 @@ else()
|
||||
message("Skipping HIP...")
|
||||
endif()
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# HCC
|
||||
#-------------------------------------------------------------------------------
|
||||
find_program(HCC_BINARY hcc HINTS ${HCC_PATH}/bin PATHS ${HCC_PATH}/bin)
|
||||
if(EXISTS ${HCC_BINARY})
|
||||
#can the following be tied to the target only?
|
||||
set(CMAKE_CXX_COMPILER ${HCC_BINARY})
|
||||
# list(APPEND CMAKE_CXX_FLAGS -hc -I /usr/include/c++/v1 -std=c++11 -stdlib=libc++ -I/opt/rocm/hcc-lc/include)
|
||||
# list(APPEND CMAKE_EXE_LINKER_FLAGS -hc -std=c++11 -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi -ldl -lpthread -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive -mcmodel=small )
|
||||
add_executable(gpu-stream-hc main.cpp HCStream.cpp)
|
||||
set_target_properties(gpu-stream-hc PROPERTIES
|
||||
COMPILE_FLAGS "-hc -I /usr/include/c++/v1 -std=c++11 -stdlib=libc++ -I/opt/rocm/hcc-lc/include"
|
||||
LINK_FLAGS "-hc -std=c++11 -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi -ldl -lpthread -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive -mcmodel=small"
|
||||
)
|
||||
target_compile_definitions(gpu-stream-hc PUBLIC HC)
|
||||
else()
|
||||
message("Skipping HC...")
|
||||
endif()
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
@ -110,6 +92,9 @@ else ()
|
||||
message("Skipping OpenCL...")
|
||||
endif ()
|
||||
|
||||
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# OpenACC
|
||||
#-------------------------------------------------------------------------------
|
||||
@ -187,6 +172,37 @@ else()
|
||||
message("Skipping Kokkos... (use -DKOKKOS_PATH=/path/to/kokkos to opt in)")
|
||||
endif()
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# HCC
|
||||
#-------------------------------------------------------------------------------
|
||||
find_program(HCC_BINARY hcc HINTS ${HCC_PATH}/bin PATHS ${HCC_PATH}/bin)
|
||||
if(EXISTS ${HCC_BINARY})
|
||||
#can the following be tied to the target only?
|
||||
set(OLD_CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER})
|
||||
set(CMAKE_CXX_COMPILER ${HCC_BINARY})
|
||||
|
||||
# list(APPEND CMAKE_CXX_FLAGS -hc -I /usr/include/c++/v1 -std=c++11 -stdlib=libc++ -I/opt/rocm/hcc-lc/include)
|
||||
# list(APPEND CMAKE_EXE_LINKER_FLAGS -hc -std=c++11 -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi -ldl -lpthread -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive -mcmodel=small )
|
||||
add_executable(gpu-stream-hc main.cpp HCStream.cpp)
|
||||
set_target_properties(gpu-stream-hc PROPERTIES
|
||||
COMPILE_FLAGS "-hc -I/usr/include/c++/v1 -std=c++11 -stdlib=libc++ -I/opt/rocm/hcc-lc/include"
|
||||
LINK_FLAGS "-hc -std=c++11 -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi -ldl -lpthread -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive -mcmodel=small"
|
||||
)
|
||||
message("OpenCL magic: ${OpenCL_LIBRARY} ${OpenCL_INCLUDE_DIR}")
|
||||
set_property(TARGET gpu-stream-ocl APPEND PROPERTY COMPILE_FLAGS "-I/opt/rocm/opencl/include/ -stdlib=libc++ -I/opt/rocm/hcc-lc/include")
|
||||
set_property(TARGET gpu-stream-ocl APPEND PROPERTY LINK_FLAGS "-L/opt/rocm/opencl/lib/x86_64 -std=c++11 -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi ")
|
||||
# set_target_properties(gpu-stream-ocl PROPERTIES
|
||||
# # COMPILE_FLAGS "-I/opt/rocm/opencl/include/opencl1.2"
|
||||
# LINK_FLAGS "-I /opt/rocm/opencl/include/opencl1.2 -L /opt/rocm/opencl/lib/x86_64 -lOpenCL -std=c++11"
|
||||
# )
|
||||
target_compile_definitions(gpu-stream-hc PUBLIC HC)
|
||||
if(TARGET gpu-stream-ocl)
|
||||
unset(gpu-stream-ocl)
|
||||
endif()
|
||||
else()
|
||||
message("Skipping HC...")
|
||||
endif()
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# SYCL
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
32
HCStream.cpp
32
HCStream.cpp
@ -72,7 +72,9 @@ HCStream<T>::HCStream(const unsigned int ARRAY_SIZE, const int device_index):
|
||||
// // Set device
|
||||
std::vector<hc::accelerator> accs = hc::accelerator::get_all();
|
||||
auto current = accs[device_index];
|
||||
|
||||
|
||||
hc::accelerator::set_default(current.get_device_path());
|
||||
|
||||
std::cout << "Using HC device " << getDeviceName(current) << std::endl;
|
||||
|
||||
// // The array size must be divisible by TBSIZE for kernel launches
|
||||
@ -142,11 +144,15 @@ void HCStream<T>::read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<
|
||||
template <class T>
|
||||
void HCStream<T>::copy()
|
||||
{
|
||||
|
||||
hc::array<T,1>& device_a = this->d_a;
|
||||
hc::array<T,1>& device_c = this->d_c;
|
||||
|
||||
try{
|
||||
// launch a GPU kernel to compute the saxpy in parallel
|
||||
// launch a GPU kernel to compute the saxpy in parallel
|
||||
hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
|
||||
, [&](hc::index<1> i) [[hc]] {
|
||||
d_c[i] = d_a[i];
|
||||
, [&](hc::index<1> index) [[hc]] {
|
||||
device_c[index] = device_a[index];
|
||||
});
|
||||
future_kernel.wait();
|
||||
}
|
||||
@ -160,11 +166,14 @@ template <class T>
|
||||
void HCStream<T>::mul()
|
||||
{
|
||||
const T scalar = 0.3;
|
||||
hc::array<T,1>& device_b = this->d_b;
|
||||
hc::array<T,1>& device_c = this->d_c;
|
||||
|
||||
try{
|
||||
// launch a GPU kernel to compute the saxpy in parallel
|
||||
hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
|
||||
, [&](hc::index<1> i) [[hc]] {
|
||||
d_b[i] = scalar*d_c[i];
|
||||
device_b[i] = scalar*device_c[i];
|
||||
});
|
||||
future_kernel.wait();
|
||||
}
|
||||
@ -177,11 +186,16 @@ void HCStream<T>::mul()
|
||||
template <class T>
|
||||
void HCStream<T>::add()
|
||||
{
|
||||
|
||||
hc::array<T,1>& device_a = this->d_a;
|
||||
hc::array<T,1>& device_b = this->d_b;
|
||||
hc::array<T,1>& device_c = this->d_c;
|
||||
|
||||
try{
|
||||
// launch a GPU kernel to compute the saxpy in parallel
|
||||
hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
|
||||
, [&](hc::index<1> i) [[hc]] {
|
||||
d_c[i] = d_a[i]+d_b[i];
|
||||
device_c[i] = device_a[i]+device_b[i];
|
||||
});
|
||||
future_kernel.wait();
|
||||
}
|
||||
@ -195,11 +209,15 @@ template <class T>
|
||||
void HCStream<T>::triad()
|
||||
{
|
||||
const T scalar = 0.3;
|
||||
hc::array<T,1>& device_a = this->d_a;
|
||||
hc::array<T,1>& device_b = this->d_b;
|
||||
hc::array<T,1>& device_c = this->d_c;
|
||||
|
||||
try{
|
||||
// launch a GPU kernel to compute the saxpy in parallel
|
||||
hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
|
||||
, [&](hc::index<1> i) [[hc]] {
|
||||
d_a[i] = d_b[i] + scalar*d_c[i];
|
||||
device_a[i] = device_b[i] + scalar*device_c[i];
|
||||
});
|
||||
future_kernel.wait();
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user