fixed setting the accelerator, CMakeLists still needs some tweeks to not build OpenCL

This commit is contained in:
Peter Steinbach 2017-01-12 15:16:24 +01:00
parent 7621f86701
commit 47d2bf275f
2 changed files with 59 additions and 25 deletions

View File

@ -58,24 +58,6 @@ else()
message("Skipping HIP...") message("Skipping HIP...")
endif() endif()
#-------------------------------------------------------------------------------
# HCC
#-------------------------------------------------------------------------------
find_program(HCC_BINARY hcc HINTS ${HCC_PATH}/bin PATHS ${HCC_PATH}/bin)
if(EXISTS ${HCC_BINARY})
#can the following be tied to the target only?
set(CMAKE_CXX_COMPILER ${HCC_BINARY})
# list(APPEND CMAKE_CXX_FLAGS -hc -I /usr/include/c++/v1 -std=c++11 -stdlib=libc++ -I/opt/rocm/hcc-lc/include)
# list(APPEND CMAKE_EXE_LINKER_FLAGS -hc -std=c++11 -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi -ldl -lpthread -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive -mcmodel=small )
add_executable(gpu-stream-hc main.cpp HCStream.cpp)
set_target_properties(gpu-stream-hc PROPERTIES
COMPILE_FLAGS "-hc -I /usr/include/c++/v1 -std=c++11 -stdlib=libc++ -I/opt/rocm/hcc-lc/include"
LINK_FLAGS "-hc -std=c++11 -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi -ldl -lpthread -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive -mcmodel=small"
)
target_compile_definitions(gpu-stream-hc PUBLIC HC)
else()
message("Skipping HC...")
endif()
#------------------------------------------------------------------------------- #-------------------------------------------------------------------------------
@ -110,6 +92,9 @@ else ()
message("Skipping OpenCL...") message("Skipping OpenCL...")
endif () endif ()
#------------------------------------------------------------------------------- #-------------------------------------------------------------------------------
# OpenACC # OpenACC
#------------------------------------------------------------------------------- #-------------------------------------------------------------------------------
@ -187,6 +172,37 @@ else()
message("Skipping Kokkos... (use -DKOKKOS_PATH=/path/to/kokkos to opt in)") message("Skipping Kokkos... (use -DKOKKOS_PATH=/path/to/kokkos to opt in)")
endif() endif()
#-------------------------------------------------------------------------------
# HCC
#-------------------------------------------------------------------------------
find_program(HCC_BINARY hcc HINTS ${HCC_PATH}/bin PATHS ${HCC_PATH}/bin)
if(EXISTS ${HCC_BINARY})
#can the following be tied to the target only?
set(OLD_CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER})
set(CMAKE_CXX_COMPILER ${HCC_BINARY})
# list(APPEND CMAKE_CXX_FLAGS -hc -I /usr/include/c++/v1 -std=c++11 -stdlib=libc++ -I/opt/rocm/hcc-lc/include)
# list(APPEND CMAKE_EXE_LINKER_FLAGS -hc -std=c++11 -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi -ldl -lpthread -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive -mcmodel=small )
add_executable(gpu-stream-hc main.cpp HCStream.cpp)
set_target_properties(gpu-stream-hc PROPERTIES
COMPILE_FLAGS "-hc -I/usr/include/c++/v1 -std=c++11 -stdlib=libc++ -I/opt/rocm/hcc-lc/include"
LINK_FLAGS "-hc -std=c++11 -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi -ldl -lpthread -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive -mcmodel=small"
)
message("OpenCL magic: ${OpenCL_LIBRARY} ${OpenCL_INCLUDE_DIR}")
set_property(TARGET gpu-stream-ocl APPEND PROPERTY COMPILE_FLAGS "-I/opt/rocm/opencl/include/ -stdlib=libc++ -I/opt/rocm/hcc-lc/include")
set_property(TARGET gpu-stream-ocl APPEND PROPERTY LINK_FLAGS "-L/opt/rocm/opencl/lib/x86_64 -std=c++11 -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi ")
# set_target_properties(gpu-stream-ocl PROPERTIES
# # COMPILE_FLAGS "-I/opt/rocm/opencl/include/opencl1.2"
# LINK_FLAGS "-I /opt/rocm/opencl/include/opencl1.2 -L /opt/rocm/opencl/lib/x86_64 -lOpenCL -std=c++11"
# )
target_compile_definitions(gpu-stream-hc PUBLIC HC)
if(TARGET gpu-stream-ocl)
unset(gpu-stream-ocl)
endif()
else()
message("Skipping HC...")
endif()
#------------------------------------------------------------------------------- #-------------------------------------------------------------------------------
# SYCL # SYCL
#------------------------------------------------------------------------------- #-------------------------------------------------------------------------------

View File

@ -72,7 +72,9 @@ HCStream<T>::HCStream(const unsigned int ARRAY_SIZE, const int device_index):
// // Set device // // Set device
std::vector<hc::accelerator> accs = hc::accelerator::get_all(); std::vector<hc::accelerator> accs = hc::accelerator::get_all();
auto current = accs[device_index]; auto current = accs[device_index];
hc::accelerator::set_default(current.get_device_path());
std::cout << "Using HC device " << getDeviceName(current) << std::endl; std::cout << "Using HC device " << getDeviceName(current) << std::endl;
// // The array size must be divisible by TBSIZE for kernel launches // // The array size must be divisible by TBSIZE for kernel launches
@ -142,11 +144,15 @@ void HCStream<T>::read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<
template <class T> template <class T>
void HCStream<T>::copy() void HCStream<T>::copy()
{ {
hc::array<T,1>& device_a = this->d_a;
hc::array<T,1>& device_c = this->d_c;
try{ try{
// launch a GPU kernel to compute the saxpy in parallel // launch a GPU kernel to compute the saxpy in parallel
hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size) hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
, [&](hc::index<1> i) [[hc]] { , [&](hc::index<1> index) [[hc]] {
d_c[i] = d_a[i]; device_c[index] = device_a[index];
}); });
future_kernel.wait(); future_kernel.wait();
} }
@ -160,11 +166,14 @@ template <class T>
void HCStream<T>::mul() void HCStream<T>::mul()
{ {
const T scalar = 0.3; const T scalar = 0.3;
hc::array<T,1>& device_b = this->d_b;
hc::array<T,1>& device_c = this->d_c;
try{ try{
// launch a GPU kernel to compute the saxpy in parallel // launch a GPU kernel to compute the saxpy in parallel
hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size) hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
, [&](hc::index<1> i) [[hc]] { , [&](hc::index<1> i) [[hc]] {
d_b[i] = scalar*d_c[i]; device_b[i] = scalar*device_c[i];
}); });
future_kernel.wait(); future_kernel.wait();
} }
@ -177,11 +186,16 @@ void HCStream<T>::mul()
template <class T> template <class T>
void HCStream<T>::add() void HCStream<T>::add()
{ {
hc::array<T,1>& device_a = this->d_a;
hc::array<T,1>& device_b = this->d_b;
hc::array<T,1>& device_c = this->d_c;
try{ try{
// launch a GPU kernel to compute the saxpy in parallel // launch a GPU kernel to compute the saxpy in parallel
hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size) hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
, [&](hc::index<1> i) [[hc]] { , [&](hc::index<1> i) [[hc]] {
d_c[i] = d_a[i]+d_b[i]; device_c[i] = device_a[i]+device_b[i];
}); });
future_kernel.wait(); future_kernel.wait();
} }
@ -195,11 +209,15 @@ template <class T>
void HCStream<T>::triad() void HCStream<T>::triad()
{ {
const T scalar = 0.3; const T scalar = 0.3;
hc::array<T,1>& device_a = this->d_a;
hc::array<T,1>& device_b = this->d_b;
hc::array<T,1>& device_c = this->d_c;
try{ try{
// launch a GPU kernel to compute the saxpy in parallel // launch a GPU kernel to compute the saxpy in parallel
hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size) hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
, [&](hc::index<1> i) [[hc]] { , [&](hc::index<1> i) [[hc]] {
d_a[i] = d_b[i] + scalar*d_c[i]; device_a[i] = device_b[i] + scalar*device_c[i];
}); });
future_kernel.wait(); future_kernel.wait();
} }