fixed setting the accelerator, CMakeLists still needs some tweeks to not build OpenCL
This commit is contained in:
parent
7621f86701
commit
47d2bf275f
@ -58,24 +58,6 @@ else()
|
|||||||
message("Skipping HIP...")
|
message("Skipping HIP...")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
# HCC
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
find_program(HCC_BINARY hcc HINTS ${HCC_PATH}/bin PATHS ${HCC_PATH}/bin)
|
|
||||||
if(EXISTS ${HCC_BINARY})
|
|
||||||
#can the following be tied to the target only?
|
|
||||||
set(CMAKE_CXX_COMPILER ${HCC_BINARY})
|
|
||||||
# list(APPEND CMAKE_CXX_FLAGS -hc -I /usr/include/c++/v1 -std=c++11 -stdlib=libc++ -I/opt/rocm/hcc-lc/include)
|
|
||||||
# list(APPEND CMAKE_EXE_LINKER_FLAGS -hc -std=c++11 -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi -ldl -lpthread -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive -mcmodel=small )
|
|
||||||
add_executable(gpu-stream-hc main.cpp HCStream.cpp)
|
|
||||||
set_target_properties(gpu-stream-hc PROPERTIES
|
|
||||||
COMPILE_FLAGS "-hc -I /usr/include/c++/v1 -std=c++11 -stdlib=libc++ -I/opt/rocm/hcc-lc/include"
|
|
||||||
LINK_FLAGS "-hc -std=c++11 -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi -ldl -lpthread -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive -mcmodel=small"
|
|
||||||
)
|
|
||||||
target_compile_definitions(gpu-stream-hc PUBLIC HC)
|
|
||||||
else()
|
|
||||||
message("Skipping HC...")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
#-------------------------------------------------------------------------------
|
||||||
@ -110,6 +92,9 @@ else ()
|
|||||||
message("Skipping OpenCL...")
|
message("Skipping OpenCL...")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
#-------------------------------------------------------------------------------
|
||||||
# OpenACC
|
# OpenACC
|
||||||
#-------------------------------------------------------------------------------
|
#-------------------------------------------------------------------------------
|
||||||
@ -187,6 +172,37 @@ else()
|
|||||||
message("Skipping Kokkos... (use -DKOKKOS_PATH=/path/to/kokkos to opt in)")
|
message("Skipping Kokkos... (use -DKOKKOS_PATH=/path/to/kokkos to opt in)")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
#-------------------------------------------------------------------------------
|
||||||
|
# HCC
|
||||||
|
#-------------------------------------------------------------------------------
|
||||||
|
find_program(HCC_BINARY hcc HINTS ${HCC_PATH}/bin PATHS ${HCC_PATH}/bin)
|
||||||
|
if(EXISTS ${HCC_BINARY})
|
||||||
|
#can the following be tied to the target only?
|
||||||
|
set(OLD_CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER})
|
||||||
|
set(CMAKE_CXX_COMPILER ${HCC_BINARY})
|
||||||
|
|
||||||
|
# list(APPEND CMAKE_CXX_FLAGS -hc -I /usr/include/c++/v1 -std=c++11 -stdlib=libc++ -I/opt/rocm/hcc-lc/include)
|
||||||
|
# list(APPEND CMAKE_EXE_LINKER_FLAGS -hc -std=c++11 -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi -ldl -lpthread -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive -mcmodel=small )
|
||||||
|
add_executable(gpu-stream-hc main.cpp HCStream.cpp)
|
||||||
|
set_target_properties(gpu-stream-hc PROPERTIES
|
||||||
|
COMPILE_FLAGS "-hc -I/usr/include/c++/v1 -std=c++11 -stdlib=libc++ -I/opt/rocm/hcc-lc/include"
|
||||||
|
LINK_FLAGS "-hc -std=c++11 -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi -ldl -lpthread -Wl,--whole-archive -lmcwamp -Wl,--no-whole-archive -mcmodel=small"
|
||||||
|
)
|
||||||
|
message("OpenCL magic: ${OpenCL_LIBRARY} ${OpenCL_INCLUDE_DIR}")
|
||||||
|
set_property(TARGET gpu-stream-ocl APPEND PROPERTY COMPILE_FLAGS "-I/opt/rocm/opencl/include/ -stdlib=libc++ -I/opt/rocm/hcc-lc/include")
|
||||||
|
set_property(TARGET gpu-stream-ocl APPEND PROPERTY LINK_FLAGS "-L/opt/rocm/opencl/lib/x86_64 -std=c++11 -L/opt/rocm/hcc-lc/lib -Wl,--rpath=/opt/rocm/hcc-lc/lib -lc++ -lc++abi ")
|
||||||
|
# set_target_properties(gpu-stream-ocl PROPERTIES
|
||||||
|
# # COMPILE_FLAGS "-I/opt/rocm/opencl/include/opencl1.2"
|
||||||
|
# LINK_FLAGS "-I /opt/rocm/opencl/include/opencl1.2 -L /opt/rocm/opencl/lib/x86_64 -lOpenCL -std=c++11"
|
||||||
|
# )
|
||||||
|
target_compile_definitions(gpu-stream-hc PUBLIC HC)
|
||||||
|
if(TARGET gpu-stream-ocl)
|
||||||
|
unset(gpu-stream-ocl)
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
message("Skipping HC...")
|
||||||
|
endif()
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
#-------------------------------------------------------------------------------
|
||||||
# SYCL
|
# SYCL
|
||||||
#-------------------------------------------------------------------------------
|
#-------------------------------------------------------------------------------
|
||||||
|
|||||||
28
HCStream.cpp
28
HCStream.cpp
@ -73,6 +73,8 @@ HCStream<T>::HCStream(const unsigned int ARRAY_SIZE, const int device_index):
|
|||||||
std::vector<hc::accelerator> accs = hc::accelerator::get_all();
|
std::vector<hc::accelerator> accs = hc::accelerator::get_all();
|
||||||
auto current = accs[device_index];
|
auto current = accs[device_index];
|
||||||
|
|
||||||
|
hc::accelerator::set_default(current.get_device_path());
|
||||||
|
|
||||||
std::cout << "Using HC device " << getDeviceName(current) << std::endl;
|
std::cout << "Using HC device " << getDeviceName(current) << std::endl;
|
||||||
|
|
||||||
// // The array size must be divisible by TBSIZE for kernel launches
|
// // The array size must be divisible by TBSIZE for kernel launches
|
||||||
@ -142,11 +144,15 @@ void HCStream<T>::read_arrays(std::vector<T>& a, std::vector<T>& b, std::vector<
|
|||||||
template <class T>
|
template <class T>
|
||||||
void HCStream<T>::copy()
|
void HCStream<T>::copy()
|
||||||
{
|
{
|
||||||
|
|
||||||
|
hc::array<T,1>& device_a = this->d_a;
|
||||||
|
hc::array<T,1>& device_c = this->d_c;
|
||||||
|
|
||||||
try{
|
try{
|
||||||
// launch a GPU kernel to compute the saxpy in parallel
|
// launch a GPU kernel to compute the saxpy in parallel
|
||||||
hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
|
hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
|
||||||
, [&](hc::index<1> i) [[hc]] {
|
, [&](hc::index<1> index) [[hc]] {
|
||||||
d_c[i] = d_a[i];
|
device_c[index] = device_a[index];
|
||||||
});
|
});
|
||||||
future_kernel.wait();
|
future_kernel.wait();
|
||||||
}
|
}
|
||||||
@ -160,11 +166,14 @@ template <class T>
|
|||||||
void HCStream<T>::mul()
|
void HCStream<T>::mul()
|
||||||
{
|
{
|
||||||
const T scalar = 0.3;
|
const T scalar = 0.3;
|
||||||
|
hc::array<T,1>& device_b = this->d_b;
|
||||||
|
hc::array<T,1>& device_c = this->d_c;
|
||||||
|
|
||||||
try{
|
try{
|
||||||
// launch a GPU kernel to compute the saxpy in parallel
|
// launch a GPU kernel to compute the saxpy in parallel
|
||||||
hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
|
hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
|
||||||
, [&](hc::index<1> i) [[hc]] {
|
, [&](hc::index<1> i) [[hc]] {
|
||||||
d_b[i] = scalar*d_c[i];
|
device_b[i] = scalar*device_c[i];
|
||||||
});
|
});
|
||||||
future_kernel.wait();
|
future_kernel.wait();
|
||||||
}
|
}
|
||||||
@ -177,11 +186,16 @@ void HCStream<T>::mul()
|
|||||||
template <class T>
|
template <class T>
|
||||||
void HCStream<T>::add()
|
void HCStream<T>::add()
|
||||||
{
|
{
|
||||||
|
|
||||||
|
hc::array<T,1>& device_a = this->d_a;
|
||||||
|
hc::array<T,1>& device_b = this->d_b;
|
||||||
|
hc::array<T,1>& device_c = this->d_c;
|
||||||
|
|
||||||
try{
|
try{
|
||||||
// launch a GPU kernel to compute the saxpy in parallel
|
// launch a GPU kernel to compute the saxpy in parallel
|
||||||
hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
|
hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
|
||||||
, [&](hc::index<1> i) [[hc]] {
|
, [&](hc::index<1> i) [[hc]] {
|
||||||
d_c[i] = d_a[i]+d_b[i];
|
device_c[i] = device_a[i]+device_b[i];
|
||||||
});
|
});
|
||||||
future_kernel.wait();
|
future_kernel.wait();
|
||||||
}
|
}
|
||||||
@ -195,11 +209,15 @@ template <class T>
|
|||||||
void HCStream<T>::triad()
|
void HCStream<T>::triad()
|
||||||
{
|
{
|
||||||
const T scalar = 0.3;
|
const T scalar = 0.3;
|
||||||
|
hc::array<T,1>& device_a = this->d_a;
|
||||||
|
hc::array<T,1>& device_b = this->d_b;
|
||||||
|
hc::array<T,1>& device_c = this->d_c;
|
||||||
|
|
||||||
try{
|
try{
|
||||||
// launch a GPU kernel to compute the saxpy in parallel
|
// launch a GPU kernel to compute the saxpy in parallel
|
||||||
hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
|
hc::completion_future future_kernel = hc::parallel_for_each(hc::extent<1>(array_size)
|
||||||
, [&](hc::index<1> i) [[hc]] {
|
, [&](hc::index<1> i) [[hc]] {
|
||||||
d_a[i] = d_b[i] + scalar*d_c[i];
|
device_a[i] = device_b[i] + scalar*device_c[i];
|
||||||
});
|
});
|
||||||
future_kernel.wait();
|
future_kernel.wait();
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user