Merge branch 'main' into rust
# Conflicts: # README.md
This commit is contained in:
commit
2ff883f2f7
98
.github/workflows/main.yaml
vendored
Normal file
98
.github/workflows/main.yaml
vendored
Normal file
@ -0,0 +1,98 @@
|
||||
name: CI
|
||||
on: [push, pull_request]
|
||||
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Cache compiler
|
||||
id: prepare-compilers
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: compilers
|
||||
key: ${{ runner.os }}-${{ hashFiles('ci-prepare-bionic.sh') }}
|
||||
|
||||
- name: Prepare compilers
|
||||
if: steps.prepare-compilers.outputs.cache-hit != 'true'
|
||||
run: source ./ci-prepare-bionic.sh ./compilers SETUP true || true
|
||||
|
||||
- name: Setup test environment
|
||||
run: source ./ci-prepare-bionic.sh ./compilers VARS false || true
|
||||
|
||||
- name: Test compile gcc @ CMake 3.13
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_13_BIN }}
|
||||
- name: Test compile clang @ CMake 3.13
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_13_BIN }}
|
||||
- name: Test compile nvhpc @ CMake 3.13
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_13_BIN }}
|
||||
- name: Test compile aocc @ CMake 3.13
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_13_BIN }}
|
||||
- name: Test compile aomp @ CMake 3.13
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_13_BIN }}
|
||||
- name: Test compile hip @ CMake 3.13
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_13_BIN }}
|
||||
- name: Test compile dpcpp @ CMake 3.13
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_13_BIN }}
|
||||
- name: Test compile hipsycl @ CMake 3.13
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_13_BIN }}
|
||||
|
||||
- name: Test compile gcc @ CMake 3.15
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_15_BIN }}
|
||||
- name: Test compile clang @ CMake 3.15
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_15_BIN }}
|
||||
- name: Test compile nvhpc @ CMake 3.15
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_15_BIN }}
|
||||
- name: Test compile aocc @ CMake 3.15
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_15_BIN }}
|
||||
- name: Test compile aomp @ CMake 3.15
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_15_BIN }}
|
||||
- name: Test compile hip @ CMake 3.15
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_15_BIN }}
|
||||
- name: Test compile dpcpp @ CMake 3.15
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_15_BIN }}
|
||||
- name: Test compile hipsycl @ CMake 3.15
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_15_BIN }}
|
||||
|
||||
- name: Test compile gcc @ CMake 3.18
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_18_BIN }}
|
||||
- name: Test compile clang @ CMake 3.18
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_18_BIN }}
|
||||
- name: Test compile nvhpc @ CMake 3.18
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_18_BIN }}
|
||||
- name: Test compile aocc @ CMake 3.18
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_18_BIN }}
|
||||
- name: Test compile aomp @ CMake 3.18
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_18_BIN }}
|
||||
- name: Test compile hip @ CMake 3.18
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_18_BIN }}
|
||||
- name: Test compile dpcpp @ CMake 3.18
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_18_BIN }}
|
||||
- name: Test compile hipsycl @ CMake 3.18
|
||||
if: ${{ ! cancelled() }}
|
||||
run: ./ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_18_BIN }}
|
||||
7
.gitignore
vendored
7
.gitignore
vendored
@ -21,3 +21,10 @@ KokkosCore_config.*
|
||||
.DS_Store
|
||||
|
||||
Makefile
|
||||
|
||||
build/
|
||||
cmake-build-*/
|
||||
CMakeFiles/
|
||||
.idea/
|
||||
.vscode/
|
||||
.directory
|
||||
78
ACC.cmake
Normal file
78
ACC.cmake
Normal file
@ -0,0 +1,78 @@
|
||||
|
||||
register_flag_optional(CMAKE_CXX_COMPILER
|
||||
"Any CXX compiler that supports OpenACC as per CMake detection"
|
||||
"c++")
|
||||
|
||||
register_flag_optional(TARGET_DEVICE
|
||||
"[PGI/NVHPC only] This sets the `-target` flag, possible values are:
|
||||
gpu - Globally set the target device to an NVIDIA GPU
|
||||
multicore - Globally set the target device to the host CPU
|
||||
Refer to `nvc++ --help` for the full list"
|
||||
"")
|
||||
|
||||
|
||||
register_flag_optional(CUDA_ARCH
|
||||
"[PGI/NVHPC only] Only applicable if `TARGET_DEVICE` is set to `gpu`.
|
||||
Nvidia architecture in ccXY format, for example, sm_70 becomes cc70, will be passed in via `-gpu=` (e.g `cc70`)
|
||||
Possible values are:
|
||||
cc35 - Compile for compute capability 3.5
|
||||
cc50 - Compile for compute capability 5.0
|
||||
cc60 - Compile for compute capability 6.0
|
||||
cc62 - Compile for compute capability 6.2
|
||||
cc70 - Compile for compute capability 7.0
|
||||
cc72 - Compile for compute capability 7.2
|
||||
cc75 - Compile for compute capability 7.5
|
||||
cc80 - Compile for compute capability 8.0
|
||||
ccall - Compile for all supported compute capabilities
|
||||
Refer to `nvc++ --help` for the full list"
|
||||
"")
|
||||
|
||||
register_flag_optional(TARGET_PROCESSOR
|
||||
"[PGI/NVHPC only] This sets the `-tp` (target processor) flag, possible values are:
|
||||
px - Generic x86 Processor
|
||||
bulldozer - AMD Bulldozer processor
|
||||
piledriver - AMD Piledriver processor
|
||||
zen - AMD Zen architecture (Epyc, Ryzen)
|
||||
zen2 - AMD Zen 2 architecture (Ryzen 2)
|
||||
sandybridge - Intel SandyBridge processor
|
||||
haswell - Intel Haswell processor
|
||||
knl - Intel Knights Landing processor
|
||||
skylake - Intel Skylake Xeon processor
|
||||
host - Link native version of HPC SDK cpu math library
|
||||
native - Alias for -tp host
|
||||
Refer to `nvc++ --help` for the full list"
|
||||
"")
|
||||
|
||||
macro(setup)
|
||||
find_package(OpenACC REQUIRED)
|
||||
|
||||
if(${CMAKE_VERSION} VERSION_LESS "3.16.0")
|
||||
# CMake didn't really implement ACC as a target before 3.16, so we append them manually
|
||||
separate_arguments(OpenACC_CXX_FLAGS)
|
||||
register_append_cxx_flags(ANY ${OpenACC_CXX_FLAGS})
|
||||
register_append_link_flags(${OpenACC_CXX_FLAGS})
|
||||
else()
|
||||
register_link_library(OpenACC::OpenACC_CXX)
|
||||
endif()
|
||||
|
||||
|
||||
register_definitions(restrict=__restrict)
|
||||
# XXX NVHPC is really new so older Cmake thinks it's PGI, which is true
|
||||
if ((CMAKE_CXX_COMPILER_ID STREQUAL PGI) OR (CMAKE_CXX_COMPILER_ID STREQUAL NVHPC))
|
||||
|
||||
if (TARGET_DEVICE)
|
||||
register_append_cxx_flags(ANY -target=${TARGET_DEVICE})
|
||||
endif ()
|
||||
|
||||
if (CUDA_ARCH)
|
||||
register_append_cxx_flags(ANY -gpu=${CUDA_ARCH})
|
||||
endif ()
|
||||
|
||||
if (TARGET_PROCESSOR)
|
||||
register_append_cxx_flags(ANY -tp=${TARGET_PROCESSOR})
|
||||
endif ()
|
||||
|
||||
endif ()
|
||||
|
||||
endmacro()
|
||||
|
||||
@ -16,9 +16,14 @@ ACCStream<T>::ACCStream(const int ARRAY_SIZE, int device)
|
||||
array_size = ARRAY_SIZE;
|
||||
|
||||
// Set up data region on device
|
||||
a = new T[array_size];
|
||||
b = new T[array_size];
|
||||
c = new T[array_size];
|
||||
this->a = new T[array_size];
|
||||
this->b = new T[array_size];
|
||||
this->c = new T[array_size];
|
||||
|
||||
T * restrict a = this->a;
|
||||
T * restrict b = this->b;
|
||||
T * restrict c = this->c;
|
||||
|
||||
#pragma acc enter data create(a[0:array_size], b[0:array_size], c[0:array_size])
|
||||
{}
|
||||
}
|
||||
@ -28,6 +33,11 @@ ACCStream<T>::~ACCStream()
|
||||
{
|
||||
// End data region on device
|
||||
int array_size = this->array_size;
|
||||
|
||||
T * restrict a = this->a;
|
||||
T * restrict b = this->b;
|
||||
T * restrict c = this->c;
|
||||
|
||||
#pragma acc exit data delete(a[0:array_size], b[0:array_size], c[0:array_size])
|
||||
{}
|
||||
|
||||
|
||||
@ -19,9 +19,17 @@
|
||||
template <class T>
|
||||
class ACCStream : public Stream<T>
|
||||
{
|
||||
|
||||
struct A{
|
||||
T *a;
|
||||
T *b;
|
||||
T *c;
|
||||
};
|
||||
|
||||
protected:
|
||||
// Size of arrays
|
||||
int array_size;
|
||||
A aa;
|
||||
// Device side pointers
|
||||
T *a;
|
||||
T *b;
|
||||
|
||||
@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file.
|
||||
- SYCL build rules for ComputeCpp, DPCPP and HipSYCL.
|
||||
- Support for CUDA Managed Memory and Page Fault memory.
|
||||
- Added nstream kernel from PRK with associate command line option.
|
||||
- CMake build system added for all models.
|
||||
|
||||
### Changed
|
||||
- Default branch renamed from `master` to `main`.
|
||||
@ -27,6 +28,7 @@ All notable changes to this project will be documented in this file.
|
||||
- Ensure all OpenCL kernels are present in destructor.
|
||||
- Unified run function in driver code to reduce code duplication, output should be uneffected.
|
||||
- Normalise sum result by expected value to help false negative errors.
|
||||
- HC version deprecated and moved to a legacy directory.
|
||||
|
||||
### Removed
|
||||
- Pre-building of kernels in SYCL version to ensure compatibility with SYCL 1.2.1.
|
||||
|
||||
117
CL/cl_d3d10.h
Normal file
117
CL/cl_d3d10.h
Normal file
@ -0,0 +1,117 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_D3D10_H
|
||||
#define __OPENCL_CL_D3D10_H
|
||||
|
||||
#include <d3d10.h>
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************
|
||||
* cl_khr_d3d10_sharing */
|
||||
#define cl_khr_d3d10_sharing 1
|
||||
|
||||
typedef cl_uint cl_d3d10_device_source_khr;
|
||||
typedef cl_uint cl_d3d10_device_set_khr;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_D3D10_DEVICE_KHR -1002
|
||||
#define CL_INVALID_D3D10_RESOURCE_KHR -1003
|
||||
#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
|
||||
#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
|
||||
|
||||
/* cl_d3d10_device_source_nv */
|
||||
#define CL_D3D10_DEVICE_KHR 0x4010
|
||||
#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
|
||||
|
||||
/* cl_d3d10_device_set_nv */
|
||||
#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
|
||||
#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
|
||||
#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
|
||||
#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_d3d10_device_source_khr d3d_device_source,
|
||||
void * d3d_object,
|
||||
cl_d3d10_device_set_khr d3d_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Buffer * resource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Texture2D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D10Texture3D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_D3D10_H */
|
||||
|
||||
117
CL/cl_d3d11.h
Normal file
117
CL/cl_d3d11.h
Normal file
@ -0,0 +1,117 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_D3D11_H
|
||||
#define __OPENCL_CL_D3D11_H
|
||||
|
||||
#include <d3d11.h>
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************
|
||||
* cl_khr_d3d11_sharing */
|
||||
#define cl_khr_d3d11_sharing 1
|
||||
|
||||
typedef cl_uint cl_d3d11_device_source_khr;
|
||||
typedef cl_uint cl_d3d11_device_set_khr;
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_D3D11_DEVICE_KHR -1006
|
||||
#define CL_INVALID_D3D11_RESOURCE_KHR -1007
|
||||
#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008
|
||||
#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009
|
||||
|
||||
/* cl_d3d11_device_source */
|
||||
#define CL_D3D11_DEVICE_KHR 0x4019
|
||||
#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A
|
||||
|
||||
/* cl_d3d11_device_set */
|
||||
#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B
|
||||
#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D
|
||||
#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_D3D11_RESOURCE_KHR 0x401E
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020
|
||||
#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_d3d11_device_source_khr d3d_device_source,
|
||||
void * d3d_object,
|
||||
cl_d3d11_device_set_khr d3d_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Buffer * resource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Texture2D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
ID3D11Texture3D * resource,
|
||||
UINT subresource,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_D3D11_H */
|
||||
|
||||
118
CL/cl_dx9_media_sharing.h
Normal file
118
CL/cl_dx9_media_sharing.h
Normal file
@ -0,0 +1,118 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H
|
||||
#define __OPENCL_CL_DX9_MEDIA_SHARING_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
/* cl_khr_dx9_media_sharing */
|
||||
#define cl_khr_dx9_media_sharing 1
|
||||
|
||||
typedef cl_uint cl_dx9_media_adapter_type_khr;
|
||||
typedef cl_uint cl_dx9_media_adapter_set_khr;
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <d3d9.h>
|
||||
typedef struct _cl_dx9_surface_info_khr
|
||||
{
|
||||
IDirect3DSurface9 *resource;
|
||||
HANDLE shared_handle;
|
||||
} cl_dx9_surface_info_khr;
|
||||
#endif
|
||||
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
/* Error Codes */
|
||||
#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010
|
||||
#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011
|
||||
#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012
|
||||
#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013
|
||||
|
||||
/* cl_media_adapter_type_khr */
|
||||
#define CL_ADAPTER_D3D9_KHR 0x2020
|
||||
#define CL_ADAPTER_D3D9EX_KHR 0x2021
|
||||
#define CL_ADAPTER_DXVA_KHR 0x2022
|
||||
|
||||
/* cl_media_adapter_set_khr */
|
||||
#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023
|
||||
#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025
|
||||
#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026
|
||||
#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028
|
||||
#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B
|
||||
#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_uint num_media_adapters,
|
||||
cl_dx9_media_adapter_type_khr * media_adapter_type,
|
||||
void * media_adapters,
|
||||
cl_dx9_media_adapter_set_khr media_adapter_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * devices,
|
||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_dx9_media_adapter_type_khr adapter_type,
|
||||
void * surface_info,
|
||||
cl_uint plane,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */
|
||||
|
||||
170
CL/cl_dx9_media_sharing_intel.h
Normal file
170
CL/cl_dx9_media_sharing_intel.h
Normal file
@ -0,0 +1,170 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
/*****************************************************************************\
|
||||
|
||||
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
|
||||
|
||||
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
|
||||
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
File Name: cl_dx9_media_sharing_intel.h
|
||||
|
||||
Abstract:
|
||||
|
||||
Notes:
|
||||
|
||||
\*****************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
|
||||
#define __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
#include <d3d9.h>
|
||||
#include <dxvahd.h>
|
||||
#include <wtypes.h>
|
||||
#include <d3d9types.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/***************************************
|
||||
* cl_intel_dx9_media_sharing extension *
|
||||
****************************************/
|
||||
|
||||
#define cl_intel_dx9_media_sharing 1
|
||||
|
||||
typedef cl_uint cl_dx9_device_source_intel;
|
||||
typedef cl_uint cl_dx9_device_set_intel;
|
||||
|
||||
/* error codes */
|
||||
#define CL_INVALID_DX9_DEVICE_INTEL -1010
|
||||
#define CL_INVALID_DX9_RESOURCE_INTEL -1011
|
||||
#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012
|
||||
#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013
|
||||
|
||||
/* cl_dx9_device_source_intel */
|
||||
#define CL_D3D9_DEVICE_INTEL 0x4022
|
||||
#define CL_D3D9EX_DEVICE_INTEL 0x4070
|
||||
#define CL_DXVA_DEVICE_INTEL 0x4071
|
||||
|
||||
/* cl_dx9_device_set_intel */
|
||||
#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024
|
||||
#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026
|
||||
#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072
|
||||
#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_DX9_RESOURCE_INTEL 0x4027
|
||||
#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_DX9_PLANE_INTEL 0x4075
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A
|
||||
#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B
|
||||
/******************************************************************************/
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetDeviceIDsFromDX9INTEL(
|
||||
cl_platform_id platform,
|
||||
cl_dx9_device_source_intel dx9_device_source,
|
||||
void* dx9_object,
|
||||
cl_dx9_device_set_intel dx9_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_dx9_device_source_intel dx9_device_source,
|
||||
void* dx9_object,
|
||||
cl_dx9_device_set_intel dx9_device_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromDX9MediaSurfaceINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
IDirect3DSurface9* resource,
|
||||
HANDLE sharedHandle,
|
||||
UINT plane,
|
||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
IDirect3DSurface9* resource,
|
||||
HANDLE sharedHandle,
|
||||
UINT plane,
|
||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireDX9ObjectsINTEL(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseDX9ObjectsINTEL(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H */
|
||||
|
||||
120
CL/cl_egl.h
Normal file
120
CL/cl_egl.h
Normal file
@ -0,0 +1,120 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_EGL_H
|
||||
#define __OPENCL_CL_EGL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
|
||||
#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F
|
||||
#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D
|
||||
#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E
|
||||
|
||||
/* Error type for clCreateFromEGLImageKHR */
|
||||
#define CL_INVALID_EGL_OBJECT_KHR -1093
|
||||
#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092
|
||||
|
||||
/* CLeglImageKHR is an opaque handle to an EGLImage */
|
||||
typedef void* CLeglImageKHR;
|
||||
|
||||
/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
|
||||
typedef void* CLeglDisplayKHR;
|
||||
|
||||
/* CLeglSyncKHR is an opaque handle to an EGLSync object */
|
||||
typedef void* CLeglSyncKHR;
|
||||
|
||||
/* properties passed to clCreateFromEGLImageKHR */
|
||||
typedef intptr_t cl_egl_image_properties_khr;
|
||||
|
||||
|
||||
#define cl_khr_egl_image 1
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromEGLImageKHR(cl_context context,
|
||||
CLeglDisplayKHR egldisplay,
|
||||
CLeglImageKHR eglimage,
|
||||
cl_mem_flags flags,
|
||||
const cl_egl_image_properties_khr * properties,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
|
||||
cl_context context,
|
||||
CLeglDisplayKHR egldisplay,
|
||||
CLeglImageKHR eglimage,
|
||||
cl_mem_flags flags,
|
||||
const cl_egl_image_properties_khr * properties,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
|
||||
#define cl_khr_egl_event 1
|
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||
clCreateEventFromEGLSyncKHR(cl_context context,
|
||||
CLeglSyncKHR sync,
|
||||
CLeglDisplayKHR display,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
|
||||
cl_context context,
|
||||
CLeglSyncKHR sync,
|
||||
CLeglDisplayKHR display,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_EGL_H */
|
||||
841
CL/cl_ext.h
Normal file
841
CL/cl_ext.h
Normal file
@ -0,0 +1,841 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
/* cl_ext.h contains OpenCL extensions which don't have external */
|
||||
/* (OpenGL, D3D) dependencies. */
|
||||
|
||||
#ifndef __CL_EXT_H
|
||||
#define __CL_EXT_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
/* cl_khr_fp64 extension - no extension #define since it has no functions */
|
||||
/* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */
|
||||
|
||||
#if CL_TARGET_OPENCL_VERSION <= 110
|
||||
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
|
||||
#endif
|
||||
|
||||
/* cl_khr_fp16 extension - no extension #define since it has no functions */
|
||||
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
|
||||
|
||||
/* Memory object destruction
|
||||
*
|
||||
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
|
||||
*
|
||||
* Registers a user callback function that will be called when the memory object is deleted and its resources
|
||||
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
|
||||
* stack associated with memobj. The registered user callback functions are called in the reverse order in
|
||||
* which they were registered. The user callback functions are called and then the memory object is deleted
|
||||
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
|
||||
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
|
||||
* the storage bits for the memory object, can be reused or freed.
|
||||
*
|
||||
* The application may not call CL api's with the cl_mem object passed to the pfn_notify.
|
||||
*
|
||||
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
|
||||
* before using.
|
||||
*/
|
||||
#define cl_APPLE_SetMemObjectDestructor 1
|
||||
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem memobj,
|
||||
void (* pfn_notify)(cl_mem memobj, void * user_data),
|
||||
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/* Context Logging Functions
|
||||
*
|
||||
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
|
||||
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
|
||||
* before using.
|
||||
*
|
||||
* clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger
|
||||
*/
|
||||
#define cl_APPLE_ContextLoggingFunctions 1
|
||||
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * errstr,
|
||||
const void * private_info,
|
||||
size_t cb,
|
||||
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
|
||||
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * errstr,
|
||||
const void * private_info,
|
||||
size_t cb,
|
||||
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
|
||||
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * errstr,
|
||||
const void * private_info,
|
||||
size_t cb,
|
||||
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/************************
|
||||
* cl_khr_icd extension *
|
||||
************************/
|
||||
#define cl_khr_icd 1
|
||||
|
||||
/* cl_platform_info */
|
||||
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
|
||||
|
||||
/* Additional Error Codes */
|
||||
#define CL_PLATFORM_NOT_FOUND_KHR -1001
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clIcdGetPlatformIDsKHR(cl_uint num_entries,
|
||||
cl_platform_id * platforms,
|
||||
cl_uint * num_platforms);
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint num_entries,
|
||||
cl_platform_id * platforms,
|
||||
cl_uint * num_platforms);
|
||||
|
||||
|
||||
/*******************************
|
||||
* cl_khr_il_program extension *
|
||||
*******************************/
|
||||
#define cl_khr_il_program 1
|
||||
|
||||
/* New property to clGetDeviceInfo for retrieving supported intermediate
|
||||
* languages
|
||||
*/
|
||||
#define CL_DEVICE_IL_VERSION_KHR 0x105B
|
||||
|
||||
/* New property to clGetProgramInfo for retrieving for retrieving the IL of a
|
||||
* program
|
||||
*/
|
||||
#define CL_PROGRAM_IL_KHR 0x1169
|
||||
|
||||
extern CL_API_ENTRY cl_program CL_API_CALL
|
||||
clCreateProgramWithILKHR(cl_context context,
|
||||
const void * il,
|
||||
size_t length,
|
||||
cl_int * errcode_ret);
|
||||
|
||||
typedef CL_API_ENTRY cl_program
|
||||
(CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context context,
|
||||
const void * il,
|
||||
size_t length,
|
||||
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
/* Extension: cl_khr_image2d_from_buffer
|
||||
*
|
||||
* This extension allows a 2D image to be created from a cl_mem buffer without
|
||||
* a copy. The type associated with a 2D image created from a buffer in an
|
||||
* OpenCL program is image2d_t. Both the sampler and sampler-less read_image
|
||||
* built-in functions are supported for 2D images and 2D images created from
|
||||
* a buffer. Similarly, the write_image built-ins are also supported for 2D
|
||||
* images created from a buffer.
|
||||
*
|
||||
* When the 2D image from buffer is created, the client must specify the
|
||||
* width, height, image format (i.e. channel order and channel data type)
|
||||
* and optionally the row pitch.
|
||||
*
|
||||
* The pitch specified must be a multiple of
|
||||
* CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR pixels.
|
||||
* The base address of the buffer must be aligned to
|
||||
* CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR pixels.
|
||||
*/
|
||||
|
||||
#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR 0x104A
|
||||
#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR 0x104B
|
||||
|
||||
|
||||
/**************************************
|
||||
* cl_khr_initialize_memory extension *
|
||||
**************************************/
|
||||
|
||||
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030
|
||||
|
||||
|
||||
/**************************************
|
||||
* cl_khr_terminate_context extension *
|
||||
**************************************/
|
||||
|
||||
#define CL_CONTEXT_TERMINATED_KHR -1121
|
||||
|
||||
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031
|
||||
#define CL_CONTEXT_TERMINATE_KHR 0x2032
|
||||
|
||||
#define cl_khr_terminate_context 1
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clTerminateContextKHR(cl_context context) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
|
||||
/*
|
||||
* Extension: cl_khr_spir
|
||||
*
|
||||
* This extension adds support to create an OpenCL program object from a
|
||||
* Standard Portable Intermediate Representation (SPIR) instance
|
||||
*/
|
||||
|
||||
#define CL_DEVICE_SPIR_VERSIONS 0x40E0
|
||||
#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1
|
||||
|
||||
|
||||
/*****************************************
|
||||
* cl_khr_create_command_queue extension *
|
||||
*****************************************/
|
||||
#define cl_khr_create_command_queue 1
|
||||
|
||||
typedef cl_bitfield cl_queue_properties_khr;
|
||||
|
||||
extern CL_API_ENTRY cl_command_queue CL_API_CALL
|
||||
clCreateCommandQueueWithPropertiesKHR(cl_context context,
|
||||
cl_device_id device,
|
||||
const cl_queue_properties_khr* properties,
|
||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_command_queue
|
||||
(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context,
|
||||
cl_device_id device,
|
||||
const cl_queue_properties_khr* properties,
|
||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
|
||||
/******************************************
|
||||
* cl_nv_device_attribute_query extension *
|
||||
******************************************/
|
||||
|
||||
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
|
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
|
||||
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
|
||||
#define CL_DEVICE_WARP_SIZE_NV 0x4003
|
||||
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
|
||||
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
|
||||
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_amd_device_attribute_query *
|
||||
*********************************/
|
||||
|
||||
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
|
||||
#define CL_DEVICE_TOPOLOGY_AMD 0x4037
|
||||
#define CL_DEVICE_BOARD_NAME_AMD 0x4038
|
||||
#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039
|
||||
#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040
|
||||
#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041
|
||||
#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042
|
||||
#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043
|
||||
#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044
|
||||
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045
|
||||
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046
|
||||
#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047
|
||||
#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048
|
||||
#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049
|
||||
#define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A
|
||||
#define CL_DEVICE_GFXIP_MINOR_AMD 0x404B
|
||||
#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C
|
||||
#define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD 0x4030
|
||||
#define CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD 0x4031
|
||||
#define CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD 0x4033
|
||||
#define CL_DEVICE_PCIE_ID_AMD 0x4034
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_arm_printf extension
|
||||
*********************************/
|
||||
|
||||
#define CL_PRINTF_CALLBACK_ARM 0x40B0
|
||||
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1
|
||||
|
||||
|
||||
/***********************************
|
||||
* cl_ext_device_fission extension
|
||||
***********************************/
|
||||
#define cl_ext_device_fission 1
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clReleaseDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clRetainDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef cl_ulong cl_device_partition_property_ext;
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clCreateSubDevicesEXT(cl_device_id in_device,
|
||||
const cl_device_partition_property_ext * properties,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * out_devices,
|
||||
cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id in_device,
|
||||
const cl_device_partition_property_ext * properties,
|
||||
cl_uint num_entries,
|
||||
cl_device_id * out_devices,
|
||||
cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
/* cl_device_partition_property_ext */
|
||||
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
|
||||
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
|
||||
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
|
||||
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
|
||||
|
||||
/* clDeviceGetInfo selectors */
|
||||
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
|
||||
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
|
||||
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
|
||||
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
|
||||
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
|
||||
|
||||
/* error codes */
|
||||
#define CL_DEVICE_PARTITION_FAILED_EXT -1057
|
||||
#define CL_INVALID_PARTITION_COUNT_EXT -1058
|
||||
#define CL_INVALID_PARTITION_NAME_EXT -1059
|
||||
|
||||
/* CL_AFFINITY_DOMAINs */
|
||||
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
|
||||
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
|
||||
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
|
||||
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
|
||||
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
|
||||
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
|
||||
|
||||
/* cl_device_partition_property_ext list terminators */
|
||||
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
|
||||
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
|
||||
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
|
||||
|
||||
|
||||
/***********************************
|
||||
* cl_ext_migrate_memobject extension definitions
|
||||
***********************************/
|
||||
#define cl_ext_migrate_memobject 1
|
||||
|
||||
typedef cl_bitfield cl_mem_migration_flags_ext;
|
||||
|
||||
#define CL_MIGRATE_MEM_OBJECT_HOST_EXT 0x1
|
||||
|
||||
#define CL_COMMAND_MIGRATE_MEM_OBJECT_EXT 0x4040
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue,
|
||||
cl_uint num_mem_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_mem_migration_flags_ext flags,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue,
|
||||
cl_uint num_mem_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_mem_migration_flags_ext flags,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event);
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_ext_host_ptr extension
|
||||
*********************************/
|
||||
#define cl_qcom_ext_host_ptr 1
|
||||
|
||||
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
|
||||
|
||||
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
|
||||
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
|
||||
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
|
||||
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
|
||||
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
|
||||
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
|
||||
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
|
||||
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
|
||||
|
||||
typedef cl_uint cl_image_pitch_info_qcom;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetDeviceImageInfoQCOM(cl_device_id device,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
const cl_image_format *image_format,
|
||||
cl_image_pitch_info_qcom param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
typedef struct _cl_mem_ext_host_ptr
|
||||
{
|
||||
/* Type of external memory allocation. */
|
||||
/* Legal values will be defined in layered extensions. */
|
||||
cl_uint allocation_type;
|
||||
|
||||
/* Host cache policy for this external memory allocation. */
|
||||
cl_uint host_cache_policy;
|
||||
|
||||
} cl_mem_ext_host_ptr;
|
||||
|
||||
|
||||
/*******************************************
|
||||
* cl_qcom_ext_host_ptr_iocoherent extension
|
||||
********************************************/
|
||||
|
||||
/* Cache policy specifying io-coherence */
|
||||
#define CL_MEM_HOST_IOCOHERENT_QCOM 0x40A9
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_ion_host_ptr extension
|
||||
*********************************/
|
||||
|
||||
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
|
||||
|
||||
typedef struct _cl_mem_ion_host_ptr
|
||||
{
|
||||
/* Type of external memory allocation. */
|
||||
/* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
|
||||
cl_mem_ext_host_ptr ext_host_ptr;
|
||||
|
||||
/* ION file descriptor */
|
||||
int ion_filedesc;
|
||||
|
||||
/* Host pointer to the ION allocated memory */
|
||||
void* ion_hostptr;
|
||||
|
||||
} cl_mem_ion_host_ptr;
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_qcom_android_native_buffer_host_ptr extension
|
||||
*********************************/
|
||||
|
||||
#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6
|
||||
|
||||
typedef struct _cl_mem_android_native_buffer_host_ptr
|
||||
{
|
||||
/* Type of external memory allocation. */
|
||||
/* Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. */
|
||||
cl_mem_ext_host_ptr ext_host_ptr;
|
||||
|
||||
/* Virtual pointer to the android native buffer */
|
||||
void* anb_ptr;
|
||||
|
||||
} cl_mem_android_native_buffer_host_ptr;
|
||||
|
||||
|
||||
/******************************************
|
||||
* cl_img_yuv_image extension *
|
||||
******************************************/
|
||||
|
||||
/* Image formats used in clCreateImage */
|
||||
#define CL_NV21_IMG 0x40D0
|
||||
#define CL_YV12_IMG 0x40D1
|
||||
|
||||
|
||||
/******************************************
|
||||
* cl_img_cached_allocations extension *
|
||||
******************************************/
|
||||
|
||||
/* Flag values used by clCreateBuffer */
|
||||
#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG (1 << 26)
|
||||
#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG (1 << 27)
|
||||
|
||||
|
||||
/******************************************
|
||||
* cl_img_use_gralloc_ptr extension *
|
||||
******************************************/
|
||||
#define cl_img_use_gralloc_ptr 1
|
||||
|
||||
/* Flag values used by clCreateBuffer */
|
||||
#define CL_MEM_USE_GRALLOC_PTR_IMG (1 << 28)
|
||||
|
||||
/* To be used by clGetEventInfo: */
|
||||
#define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG 0x40D2
|
||||
#define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG 0x40D3
|
||||
|
||||
/* Error code from clEnqueueReleaseGrallocObjectsIMG */
|
||||
#define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG 0x40D4
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireGrallocObjectsIMG(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseGrallocObjectsIMG(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_khr_subgroups extension
|
||||
*********************************/
|
||||
#define cl_khr_subgroups 1
|
||||
|
||||
#if !defined(CL_VERSION_2_1)
|
||||
/* For OpenCL 2.1 and newer, cl_kernel_sub_group_info is declared in CL.h.
|
||||
In hindsight, there should have been a khr suffix on this type for
|
||||
the extension, but keeping it un-suffixed to maintain backwards
|
||||
compatibility. */
|
||||
typedef cl_uint cl_kernel_sub_group_info;
|
||||
#endif
|
||||
|
||||
/* cl_kernel_sub_group_info */
|
||||
#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033
|
||||
#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetKernelSubGroupInfoKHR(cl_kernel in_kernel,
|
||||
cl_device_id in_device,
|
||||
cl_kernel_sub_group_info param_name,
|
||||
size_t input_value_size,
|
||||
const void * input_value,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel in_kernel,
|
||||
cl_device_id in_device,
|
||||
cl_kernel_sub_group_info param_name,
|
||||
size_t input_value_size,
|
||||
const void * input_value,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_khr_mipmap_image extension
|
||||
*********************************/
|
||||
|
||||
/* cl_sampler_properties */
|
||||
#define CL_SAMPLER_MIP_FILTER_MODE_KHR 0x1155
|
||||
#define CL_SAMPLER_LOD_MIN_KHR 0x1156
|
||||
#define CL_SAMPLER_LOD_MAX_KHR 0x1157
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_khr_priority_hints extension
|
||||
*********************************/
|
||||
/* This extension define is for backwards compatibility.
|
||||
It shouldn't be required since this extension has no new functions. */
|
||||
#define cl_khr_priority_hints 1
|
||||
|
||||
typedef cl_uint cl_queue_priority_khr;
|
||||
|
||||
/* cl_command_queue_properties */
|
||||
#define CL_QUEUE_PRIORITY_KHR 0x1096
|
||||
|
||||
/* cl_queue_priority_khr */
|
||||
#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0)
|
||||
#define CL_QUEUE_PRIORITY_MED_KHR (1<<1)
|
||||
#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2)
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_khr_throttle_hints extension
|
||||
*********************************/
|
||||
/* This extension define is for backwards compatibility.
|
||||
It shouldn't be required since this extension has no new functions. */
|
||||
#define cl_khr_throttle_hints 1
|
||||
|
||||
typedef cl_uint cl_queue_throttle_khr;
|
||||
|
||||
/* cl_command_queue_properties */
|
||||
#define CL_QUEUE_THROTTLE_KHR 0x1097
|
||||
|
||||
/* cl_queue_throttle_khr */
|
||||
#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0)
|
||||
#define CL_QUEUE_THROTTLE_MED_KHR (1<<1)
|
||||
#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2)
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_khr_subgroup_named_barrier
|
||||
*********************************/
|
||||
/* This extension define is for backwards compatibility.
|
||||
It shouldn't be required since this extension has no new functions. */
|
||||
#define cl_khr_subgroup_named_barrier 1
|
||||
|
||||
/* cl_device_info */
|
||||
#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_khr_extended_versioning
|
||||
*********************************/
|
||||
|
||||
#define cl_khr_extended_versioning 1
|
||||
|
||||
#define CL_VERSION_MAJOR_BITS_KHR (10)
|
||||
#define CL_VERSION_MINOR_BITS_KHR (10)
|
||||
#define CL_VERSION_PATCH_BITS_KHR (12)
|
||||
|
||||
#define CL_VERSION_MAJOR_MASK_KHR ((1 << CL_VERSION_MAJOR_BITS_KHR) - 1)
|
||||
#define CL_VERSION_MINOR_MASK_KHR ((1 << CL_VERSION_MINOR_BITS_KHR) - 1)
|
||||
#define CL_VERSION_PATCH_MASK_KHR ((1 << CL_VERSION_PATCH_BITS_KHR) - 1)
|
||||
|
||||
#define CL_VERSION_MAJOR_KHR(version) ((version) >> (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR))
|
||||
#define CL_VERSION_MINOR_KHR(version) (((version) >> CL_VERSION_PATCH_BITS_KHR) & CL_VERSION_MINOR_MASK_KHR)
|
||||
#define CL_VERSION_PATCH_KHR(version) ((version) & CL_VERSION_PATCH_MASK_KHR)
|
||||
|
||||
#define CL_MAKE_VERSION_KHR(major, minor, patch) \
|
||||
((((major) & CL_VERSION_MAJOR_MASK_KHR) << (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) | \
|
||||
(((minor) & CL_VERSION_MINOR_MASK_KHR) << CL_VERSION_PATCH_BITS_KHR) | \
|
||||
((patch) & CL_VERSION_PATCH_MASK_KHR))
|
||||
|
||||
typedef cl_uint cl_version_khr;
|
||||
|
||||
#define CL_NAME_VERSION_MAX_NAME_SIZE_KHR 64
|
||||
|
||||
typedef struct _cl_name_version_khr
|
||||
{
|
||||
cl_version_khr version;
|
||||
char name[CL_NAME_VERSION_MAX_NAME_SIZE_KHR];
|
||||
} cl_name_version_khr;
|
||||
|
||||
/* cl_platform_info */
|
||||
#define CL_PLATFORM_NUMERIC_VERSION_KHR 0x0906
|
||||
#define CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR 0x0907
|
||||
|
||||
/* cl_device_info */
|
||||
#define CL_DEVICE_NUMERIC_VERSION_KHR 0x105E
|
||||
#define CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR 0x105F
|
||||
#define CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR 0x1060
|
||||
#define CL_DEVICE_ILS_WITH_VERSION_KHR 0x1061
|
||||
#define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR 0x1062
|
||||
|
||||
|
||||
/*********************************
|
||||
* cl_khr_device_uuid extension
|
||||
*********************************/
|
||||
#define cl_khr_device_uuid 1
|
||||
|
||||
#define CL_UUID_SIZE_KHR 16
|
||||
#define CL_LUID_SIZE_KHR 8
|
||||
|
||||
#define CL_DEVICE_UUID_KHR 0x106A
|
||||
#define CL_DRIVER_UUID_KHR 0x106B
|
||||
#define CL_DEVICE_LUID_VALID_KHR 0x106C
|
||||
#define CL_DEVICE_LUID_KHR 0x106D
|
||||
#define CL_DEVICE_NODE_MASK_KHR 0x106E
|
||||
|
||||
|
||||
/**********************************
|
||||
* cl_arm_import_memory extension *
|
||||
**********************************/
|
||||
#define cl_arm_import_memory 1
|
||||
|
||||
typedef intptr_t cl_import_properties_arm;
|
||||
|
||||
/* Default and valid proporties name for cl_arm_import_memory */
|
||||
#define CL_IMPORT_TYPE_ARM 0x40B2
|
||||
|
||||
/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */
|
||||
#define CL_IMPORT_TYPE_HOST_ARM 0x40B3
|
||||
|
||||
/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
|
||||
#define CL_IMPORT_TYPE_DMA_BUF_ARM 0x40B4
|
||||
|
||||
/* Protected memory property */
|
||||
#define CL_IMPORT_TYPE_PROTECTED_ARM 0x40B5
|
||||
|
||||
/* Android hardware buffer type value for CL_IMPORT_TYPE_ARM property */
|
||||
#define CL_IMPORT_TYPE_ANDROID_HARDWARE_BUFFER_ARM 0x41E2
|
||||
|
||||
/* Data consistency with host property */
|
||||
#define CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM 0x41E3
|
||||
|
||||
/* Import memory size value to indicate a size for the whole buffer */
|
||||
#define CL_IMPORT_MEMORY_WHOLE_ALLOCATION_ARM SIZE_MAX
|
||||
|
||||
/* This extension adds a new function that allows for direct memory import into
|
||||
* OpenCL via the clImportMemoryARM function.
|
||||
*
|
||||
* Memory imported through this interface will be mapped into the device's page
|
||||
* tables directly, providing zero copy access. It will never fall back to copy
|
||||
* operations and aliased buffers.
|
||||
*
|
||||
* Types of memory supported for import are specified as additional extension
|
||||
* strings.
|
||||
*
|
||||
* This extension produces cl_mem allocations which are compatible with all other
|
||||
* users of cl_mem in the standard API.
|
||||
*
|
||||
* This extension maps pages with the same properties as the normal buffer creation
|
||||
* function clCreateBuffer.
|
||||
*/
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clImportMemoryARM( cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_import_properties_arm *properties,
|
||||
void *memory,
|
||||
size_t size,
|
||||
cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/******************************************
|
||||
* cl_arm_shared_virtual_memory extension *
|
||||
******************************************/
|
||||
#define cl_arm_shared_virtual_memory 1
|
||||
|
||||
/* Used by clGetDeviceInfo */
|
||||
#define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6
|
||||
|
||||
/* Used by clGetMemObjectInfo */
|
||||
#define CL_MEM_USES_SVM_POINTER_ARM 0x40B7
|
||||
|
||||
/* Used by clSetKernelExecInfoARM: */
|
||||
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM 0x40B8
|
||||
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM 0x40B9
|
||||
|
||||
/* To be used by clGetEventInfo: */
|
||||
#define CL_COMMAND_SVM_FREE_ARM 0x40BA
|
||||
#define CL_COMMAND_SVM_MEMCPY_ARM 0x40BB
|
||||
#define CL_COMMAND_SVM_MEMFILL_ARM 0x40BC
|
||||
#define CL_COMMAND_SVM_MAP_ARM 0x40BD
|
||||
#define CL_COMMAND_SVM_UNMAP_ARM 0x40BE
|
||||
|
||||
/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */
|
||||
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM (1 << 0)
|
||||
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM (1 << 1)
|
||||
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM (1 << 2)
|
||||
#define CL_DEVICE_SVM_ATOMICS_ARM (1 << 3)
|
||||
|
||||
/* Flag values used by clSVMAllocARM: */
|
||||
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM (1 << 10)
|
||||
#define CL_MEM_SVM_ATOMICS_ARM (1 << 11)
|
||||
|
||||
typedef cl_bitfield cl_svm_mem_flags_arm;
|
||||
typedef cl_uint cl_kernel_exec_info_arm;
|
||||
typedef cl_bitfield cl_device_svm_capabilities_arm;
|
||||
|
||||
extern CL_API_ENTRY void * CL_API_CALL
|
||||
clSVMAllocARM(cl_context context,
|
||||
cl_svm_mem_flags_arm flags,
|
||||
size_t size,
|
||||
cl_uint alignment) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY void CL_API_CALL
|
||||
clSVMFreeARM(cl_context context,
|
||||
void * svm_pointer) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueSVMFreeARM(cl_command_queue command_queue,
|
||||
cl_uint num_svm_pointers,
|
||||
void * svm_pointers[],
|
||||
void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,
|
||||
cl_uint num_svm_pointers,
|
||||
void * svm_pointers[],
|
||||
void * user_data),
|
||||
void * user_data,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueSVMMemcpyARM(cl_command_queue command_queue,
|
||||
cl_bool blocking_copy,
|
||||
void * dst_ptr,
|
||||
const void * src_ptr,
|
||||
size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueSVMMemFillARM(cl_command_queue command_queue,
|
||||
void * svm_ptr,
|
||||
const void * pattern,
|
||||
size_t pattern_size,
|
||||
size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueSVMMapARM(cl_command_queue command_queue,
|
||||
cl_bool blocking_map,
|
||||
cl_map_flags flags,
|
||||
void * svm_ptr,
|
||||
size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueSVMUnmapARM(cl_command_queue command_queue,
|
||||
void * svm_ptr,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clSetKernelArgSVMPointerARM(cl_kernel kernel,
|
||||
cl_uint arg_index,
|
||||
const void * arg_value) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clSetKernelExecInfoARM(cl_kernel kernel,
|
||||
cl_kernel_exec_info_arm param_name,
|
||||
size_t param_value_size,
|
||||
const void * param_value) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
/********************************
|
||||
* cl_arm_get_core_id extension *
|
||||
********************************/
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
|
||||
#define cl_arm_get_core_id 1
|
||||
|
||||
/* Device info property for bitfield of cores present */
|
||||
#define CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM 0x40BF
|
||||
|
||||
#endif /* CL_VERSION_1_2 */
|
||||
|
||||
/*********************************
|
||||
* cl_arm_job_slot_selection
|
||||
*********************************/
|
||||
|
||||
#define cl_arm_job_slot_selection 1
|
||||
|
||||
/* cl_device_info */
|
||||
#define CL_DEVICE_JOB_SLOTS_ARM 0x41E0
|
||||
|
||||
/* cl_command_queue_properties */
|
||||
#define CL_QUEUE_JOB_SLOT_ARM 0x41E1
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* __CL_EXT_H */
|
||||
682
CL/cl_ext_intel.h
Normal file
682
CL/cl_ext_intel.h
Normal file
@ -0,0 +1,682 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
******************************************************************************/
|
||||
/*****************************************************************************\
|
||||
|
||||
Copyright (c) 2013-2020 Intel Corporation All Rights Reserved.
|
||||
|
||||
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
|
||||
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
File Name: cl_ext_intel.h
|
||||
|
||||
Abstract:
|
||||
|
||||
Notes:
|
||||
|
||||
\*****************************************************************************/
|
||||
|
||||
#ifndef __CL_EXT_INTEL_H
|
||||
#define __CL_EXT_INTEL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/***************************************
|
||||
* cl_intel_thread_local_exec extension *
|
||||
****************************************/
|
||||
|
||||
#define cl_intel_thread_local_exec 1
|
||||
|
||||
#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31)
|
||||
|
||||
/***********************************************
|
||||
* cl_intel_device_partition_by_names extension *
|
||||
************************************************/
|
||||
|
||||
#define cl_intel_device_partition_by_names 1
|
||||
|
||||
#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052
|
||||
#define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1
|
||||
|
||||
/************************************************
|
||||
* cl_intel_accelerator extension *
|
||||
* cl_intel_motion_estimation extension *
|
||||
* cl_intel_advanced_motion_estimation extension *
|
||||
*************************************************/
|
||||
|
||||
#define cl_intel_accelerator 1
|
||||
#define cl_intel_motion_estimation 1
|
||||
#define cl_intel_advanced_motion_estimation 1
|
||||
|
||||
typedef struct _cl_accelerator_intel* cl_accelerator_intel;
|
||||
typedef cl_uint cl_accelerator_type_intel;
|
||||
typedef cl_uint cl_accelerator_info_intel;
|
||||
|
||||
typedef struct _cl_motion_estimation_desc_intel {
|
||||
cl_uint mb_block_type;
|
||||
cl_uint subpixel_mode;
|
||||
cl_uint sad_adjust_mode;
|
||||
cl_uint search_path_type;
|
||||
} cl_motion_estimation_desc_intel;
|
||||
|
||||
/* error codes */
|
||||
#define CL_INVALID_ACCELERATOR_INTEL -1094
|
||||
#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095
|
||||
#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096
|
||||
#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097
|
||||
|
||||
/* cl_accelerator_type_intel */
|
||||
#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0
|
||||
|
||||
/* cl_accelerator_info_intel */
|
||||
#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090
|
||||
#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091
|
||||
#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092
|
||||
#define CL_ACCELERATOR_TYPE_INTEL 0x4093
|
||||
|
||||
/* cl_motion_detect_desc_intel flags */
|
||||
#define CL_ME_MB_TYPE_16x16_INTEL 0x0
|
||||
#define CL_ME_MB_TYPE_8x8_INTEL 0x1
|
||||
#define CL_ME_MB_TYPE_4x4_INTEL 0x2
|
||||
|
||||
#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
|
||||
#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
|
||||
#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2
|
||||
|
||||
#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
|
||||
#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1
|
||||
|
||||
#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0
|
||||
#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1
|
||||
#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5
|
||||
|
||||
#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0
|
||||
#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1
|
||||
#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2
|
||||
#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4
|
||||
|
||||
#define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1
|
||||
#define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2
|
||||
#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3
|
||||
|
||||
#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16
|
||||
#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21
|
||||
#define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32
|
||||
#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43
|
||||
#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48
|
||||
|
||||
#define CL_ME_COST_PENALTY_NONE_INTEL 0x0
|
||||
#define CL_ME_COST_PENALTY_LOW_INTEL 0x1
|
||||
#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2
|
||||
#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3
|
||||
|
||||
#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0
|
||||
#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1
|
||||
#define CL_ME_COST_PRECISION_PEL_INTEL 0x2
|
||||
#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3
|
||||
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
|
||||
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
|
||||
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
|
||||
|
||||
#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
|
||||
#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||
#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
|
||||
#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
|
||||
|
||||
/* cl_device_info */
|
||||
#define CL_DEVICE_ME_VERSION_INTEL 0x407E
|
||||
|
||||
#define CL_ME_VERSION_LEGACY_INTEL 0x0
|
||||
#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1
|
||||
#define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2
|
||||
|
||||
extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL
|
||||
clCreateAcceleratorINTEL(
|
||||
cl_context context,
|
||||
cl_accelerator_type_intel accelerator_type,
|
||||
size_t descriptor_size,
|
||||
const void* descriptor,
|
||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_accelerator_type_intel accelerator_type,
|
||||
size_t descriptor_size,
|
||||
const void* descriptor,
|
||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetAcceleratorInfoINTEL(
|
||||
cl_accelerator_intel accelerator,
|
||||
cl_accelerator_info_intel param_name,
|
||||
size_t param_value_size,
|
||||
void* param_value,
|
||||
size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)(
|
||||
cl_accelerator_intel accelerator,
|
||||
cl_accelerator_info_intel param_name,
|
||||
size_t param_value_size,
|
||||
void* param_value,
|
||||
size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clRetainAcceleratorINTEL(
|
||||
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)(
|
||||
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clReleaseAcceleratorINTEL(
|
||||
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)(
|
||||
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
/******************************************
|
||||
* cl_intel_simultaneous_sharing extension *
|
||||
*******************************************/
|
||||
|
||||
#define cl_intel_simultaneous_sharing 1
|
||||
|
||||
#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104
|
||||
#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105
|
||||
|
||||
/***********************************
|
||||
* cl_intel_egl_image_yuv extension *
|
||||
************************************/
|
||||
|
||||
#define cl_intel_egl_image_yuv 1
|
||||
|
||||
#define CL_EGL_YUV_PLANE_INTEL 0x4107
|
||||
|
||||
/********************************
|
||||
* cl_intel_packed_yuv extension *
|
||||
*********************************/
|
||||
|
||||
#define cl_intel_packed_yuv 1
|
||||
|
||||
#define CL_YUYV_INTEL 0x4076
|
||||
#define CL_UYVY_INTEL 0x4077
|
||||
#define CL_YVYU_INTEL 0x4078
|
||||
#define CL_VYUY_INTEL 0x4079
|
||||
|
||||
/********************************************
|
||||
* cl_intel_required_subgroup_size extension *
|
||||
*********************************************/
|
||||
|
||||
#define cl_intel_required_subgroup_size 1
|
||||
|
||||
#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108
|
||||
#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109
|
||||
#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A
|
||||
|
||||
/****************************************
|
||||
* cl_intel_driver_diagnostics extension *
|
||||
*****************************************/
|
||||
|
||||
#define cl_intel_driver_diagnostics 1
|
||||
|
||||
typedef cl_uint cl_diagnostics_verbose_level;
|
||||
|
||||
#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106
|
||||
|
||||
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff )
|
||||
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 )
|
||||
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 )
|
||||
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 )
|
||||
|
||||
/********************************
|
||||
* cl_intel_planar_yuv extension *
|
||||
*********************************/
|
||||
|
||||
#define CL_NV12_INTEL 0x410E
|
||||
|
||||
#define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 )
|
||||
#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 )
|
||||
|
||||
#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E
|
||||
#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F
|
||||
|
||||
/*******************************************************
|
||||
* cl_intel_device_side_avc_motion_estimation extension *
|
||||
********************************************************/
|
||||
|
||||
#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B
|
||||
#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C
|
||||
#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D
|
||||
|
||||
#define CL_AVC_ME_VERSION_0_INTEL 0x0 /* No support. */
|
||||
#define CL_AVC_ME_VERSION_1_INTEL 0x1 /* First supported version. */
|
||||
|
||||
#define CL_AVC_ME_MAJOR_16x16_INTEL 0x0
|
||||
#define CL_AVC_ME_MAJOR_16x8_INTEL 0x1
|
||||
#define CL_AVC_ME_MAJOR_8x16_INTEL 0x2
|
||||
#define CL_AVC_ME_MAJOR_8x8_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_MINOR_8x8_INTEL 0x0
|
||||
#define CL_AVC_ME_MINOR_8x4_INTEL 0x1
|
||||
#define CL_AVC_ME_MINOR_4x8_INTEL 0x2
|
||||
#define CL_AVC_ME_MINOR_4x4_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0
|
||||
#define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1
|
||||
#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2
|
||||
|
||||
#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0
|
||||
#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E
|
||||
#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D
|
||||
#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B
|
||||
#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77
|
||||
#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F
|
||||
#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F
|
||||
#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F
|
||||
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2
|
||||
#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa
|
||||
|
||||
#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
|
||||
#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2
|
||||
|
||||
#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
|
||||
#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
|
||||
#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0
|
||||
#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1
|
||||
#define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2
|
||||
#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10
|
||||
#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15
|
||||
#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20
|
||||
#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B
|
||||
#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30
|
||||
|
||||
#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0
|
||||
#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2
|
||||
#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4
|
||||
#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8
|
||||
|
||||
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0
|
||||
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000
|
||||
|
||||
#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 )
|
||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 )
|
||||
|
||||
#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00
|
||||
#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80
|
||||
|
||||
#define CL_AVC_ME_INTRA_16x16_INTEL 0x0
|
||||
#define CL_AVC_ME_INTRA_8x8_INTEL 0x1
|
||||
#define CL_AVC_ME_INTRA_4x4_INTEL 0x2
|
||||
|
||||
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6
|
||||
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5
|
||||
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60
|
||||
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10
|
||||
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8
|
||||
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4
|
||||
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
|
||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
|
||||
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
|
||||
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
||||
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
|
||||
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1
|
||||
#define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2
|
||||
#define CL_AVC_ME_FRAME_DUAL_INTEL 0x3
|
||||
|
||||
#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0
|
||||
#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1
|
||||
#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2
|
||||
|
||||
#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0
|
||||
#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1
|
||||
|
||||
/*******************************************
|
||||
* cl_intel_unified_shared_memory extension *
|
||||
********************************************/
|
||||
|
||||
/* These APIs are in sync with Revision O of the cl_intel_unified_shared_memory spec! */
|
||||
|
||||
#define cl_intel_unified_shared_memory 1
|
||||
|
||||
/* cl_device_info */
|
||||
#define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL 0x4190
|
||||
#define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL 0x4191
|
||||
#define CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4192
|
||||
#define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4193
|
||||
#define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL 0x4194
|
||||
|
||||
typedef cl_bitfield cl_device_unified_shared_memory_capabilities_intel;
|
||||
|
||||
/* cl_device_unified_shared_memory_capabilities_intel - bitfield */
|
||||
#define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL (1 << 0)
|
||||
#define CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL (1 << 1)
|
||||
#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL (1 << 2)
|
||||
#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL (1 << 3)
|
||||
|
||||
typedef cl_bitfield cl_mem_properties_intel;
|
||||
|
||||
/* cl_mem_properties_intel */
|
||||
#define CL_MEM_ALLOC_FLAGS_INTEL 0x4195
|
||||
|
||||
typedef cl_bitfield cl_mem_alloc_flags_intel;
|
||||
|
||||
/* cl_mem_alloc_flags_intel - bitfield */
|
||||
#define CL_MEM_ALLOC_WRITE_COMBINED_INTEL (1 << 0)
|
||||
|
||||
typedef cl_uint cl_mem_info_intel;
|
||||
|
||||
/* cl_mem_alloc_info_intel */
|
||||
#define CL_MEM_ALLOC_TYPE_INTEL 0x419A
|
||||
#define CL_MEM_ALLOC_BASE_PTR_INTEL 0x419B
|
||||
#define CL_MEM_ALLOC_SIZE_INTEL 0x419C
|
||||
#define CL_MEM_ALLOC_DEVICE_INTEL 0x419D
|
||||
/* Enum values 0x419E-0x419F are reserved for future queries. */
|
||||
|
||||
typedef cl_uint cl_unified_shared_memory_type_intel;
|
||||
|
||||
/* cl_unified_shared_memory_type_intel */
|
||||
#define CL_MEM_TYPE_UNKNOWN_INTEL 0x4196
|
||||
#define CL_MEM_TYPE_HOST_INTEL 0x4197
|
||||
#define CL_MEM_TYPE_DEVICE_INTEL 0x4198
|
||||
#define CL_MEM_TYPE_SHARED_INTEL 0x4199
|
||||
|
||||
typedef cl_uint cl_mem_advice_intel;
|
||||
|
||||
/* cl_mem_advice_intel */
|
||||
/* Enum values 0x4208-0x420F are reserved for future memory advices. */
|
||||
|
||||
/* cl_kernel_exec_info */
|
||||
#define CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL 0x4200
|
||||
#define CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL 0x4201
|
||||
#define CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL 0x4202
|
||||
#define CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL 0x4203
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_MEMFILL_INTEL 0x4204
|
||||
#define CL_COMMAND_MEMCPY_INTEL 0x4205
|
||||
#define CL_COMMAND_MIGRATEMEM_INTEL 0x4206
|
||||
#define CL_COMMAND_MEMADVISE_INTEL 0x4207
|
||||
|
||||
extern CL_API_ENTRY void* CL_API_CALL
|
||||
clHostMemAllocINTEL(
|
||||
cl_context context,
|
||||
const cl_mem_properties_intel* properties,
|
||||
size_t size,
|
||||
cl_uint alignment,
|
||||
cl_int* errcode_ret);
|
||||
|
||||
typedef CL_API_ENTRY void* (CL_API_CALL *
|
||||
clHostMemAllocINTEL_fn)(
|
||||
cl_context context,
|
||||
const cl_mem_properties_intel* properties,
|
||||
size_t size,
|
||||
cl_uint alignment,
|
||||
cl_int* errcode_ret);
|
||||
|
||||
extern CL_API_ENTRY void* CL_API_CALL
|
||||
clDeviceMemAllocINTEL(
|
||||
cl_context context,
|
||||
cl_device_id device,
|
||||
const cl_mem_properties_intel* properties,
|
||||
size_t size,
|
||||
cl_uint alignment,
|
||||
cl_int* errcode_ret);
|
||||
|
||||
typedef CL_API_ENTRY void* (CL_API_CALL *
|
||||
clDeviceMemAllocINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_device_id device,
|
||||
const cl_mem_properties_intel* properties,
|
||||
size_t size,
|
||||
cl_uint alignment,
|
||||
cl_int* errcode_ret);
|
||||
|
||||
extern CL_API_ENTRY void* CL_API_CALL
|
||||
clSharedMemAllocINTEL(
|
||||
cl_context context,
|
||||
cl_device_id device,
|
||||
const cl_mem_properties_intel* properties,
|
||||
size_t size,
|
||||
cl_uint alignment,
|
||||
cl_int* errcode_ret);
|
||||
|
||||
typedef CL_API_ENTRY void* (CL_API_CALL *
|
||||
clSharedMemAllocINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_device_id device,
|
||||
const cl_mem_properties_intel* properties,
|
||||
size_t size,
|
||||
cl_uint alignment,
|
||||
cl_int* errcode_ret);
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clMemFreeINTEL(
|
||||
cl_context context,
|
||||
void* ptr);
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *
|
||||
clMemFreeINTEL_fn)(
|
||||
cl_context context,
|
||||
void* ptr);
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetMemAllocInfoINTEL(
|
||||
cl_context context,
|
||||
const void* ptr,
|
||||
cl_mem_info_intel param_name,
|
||||
size_t param_value_size,
|
||||
void* param_value,
|
||||
size_t* param_value_size_ret);
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *
|
||||
clGetMemAllocInfoINTEL_fn)(
|
||||
cl_context context,
|
||||
const void* ptr,
|
||||
cl_mem_info_intel param_name,
|
||||
size_t param_value_size,
|
||||
void* param_value,
|
||||
size_t* param_value_size_ret);
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clSetKernelArgMemPointerINTEL(
|
||||
cl_kernel kernel,
|
||||
cl_uint arg_index,
|
||||
const void* arg_value);
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *
|
||||
clSetKernelArgMemPointerINTEL_fn)(
|
||||
cl_kernel kernel,
|
||||
cl_uint arg_index,
|
||||
const void* arg_value);
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueMemsetINTEL( /* Deprecated */
|
||||
cl_command_queue command_queue,
|
||||
void* dst_ptr,
|
||||
cl_int value,
|
||||
size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event);
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *
|
||||
clEnqueueMemsetINTEL_fn)( /* Deprecated */
|
||||
cl_command_queue command_queue,
|
||||
void* dst_ptr,
|
||||
cl_int value,
|
||||
size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event);
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueMemFillINTEL(
|
||||
cl_command_queue command_queue,
|
||||
void* dst_ptr,
|
||||
const void* pattern,
|
||||
size_t pattern_size,
|
||||
size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event);
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *
|
||||
clEnqueueMemFillINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
void* dst_ptr,
|
||||
const void* pattern,
|
||||
size_t pattern_size,
|
||||
size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event);
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueMemcpyINTEL(
|
||||
cl_command_queue command_queue,
|
||||
cl_bool blocking,
|
||||
void* dst_ptr,
|
||||
const void* src_ptr,
|
||||
size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event);
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *
|
||||
clEnqueueMemcpyINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_bool blocking,
|
||||
void* dst_ptr,
|
||||
const void* src_ptr,
|
||||
size_t size,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event);
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
|
||||
/* Because these APIs use cl_mem_migration_flags, they require
|
||||
OpenCL 1.2: */
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueMigrateMemINTEL(
|
||||
cl_command_queue command_queue,
|
||||
const void* ptr,
|
||||
size_t size,
|
||||
cl_mem_migration_flags flags,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event);
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *
|
||||
clEnqueueMigrateMemINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
const void* ptr,
|
||||
size_t size,
|
||||
cl_mem_migration_flags flags,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event);
|
||||
|
||||
#endif
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueMemAdviseINTEL(
|
||||
cl_command_queue command_queue,
|
||||
const void* ptr,
|
||||
size_t size,
|
||||
cl_mem_advice_intel advice,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event);
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *
|
||||
clEnqueueMemAdviseINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
const void* ptr,
|
||||
size_t size,
|
||||
cl_mem_advice_intel advice,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __CL_EXT_INTEL_H */
|
||||
159
CL/cl_gl.h
Normal file
159
CL/cl_gl.h
Normal file
@ -0,0 +1,159 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_GL_H
|
||||
#define __OPENCL_CL_GL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef cl_uint cl_gl_object_type;
|
||||
typedef cl_uint cl_gl_texture_info;
|
||||
typedef cl_uint cl_gl_platform_info;
|
||||
typedef struct __GLsync *cl_GLsync;
|
||||
|
||||
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
|
||||
#define CL_GL_OBJECT_BUFFER 0x2000
|
||||
#define CL_GL_OBJECT_TEXTURE2D 0x2001
|
||||
#define CL_GL_OBJECT_TEXTURE3D 0x2002
|
||||
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
|
||||
#ifdef CL_VERSION_1_2
|
||||
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
|
||||
#define CL_GL_OBJECT_TEXTURE1D 0x200F
|
||||
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
|
||||
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
|
||||
#endif
|
||||
|
||||
/* cl_gl_texture_info */
|
||||
#define CL_GL_TEXTURE_TARGET 0x2004
|
||||
#define CL_GL_MIPMAP_LEVEL 0x2005
|
||||
#ifdef CL_VERSION_1_2
|
||||
#define CL_GL_NUM_SAMPLES 0x2012
|
||||
#endif
|
||||
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLBuffer(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLuint bufobj,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
#endif
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromGLRenderbuffer(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLuint renderbuffer,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLObjectInfo(cl_mem memobj,
|
||||
cl_gl_object_type * gl_object_type,
|
||||
cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLTextureInfo(cl_mem memobj,
|
||||
cl_gl_texture_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireGLObjects(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseGLObjects(cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem * mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event * event_wait_list,
|
||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/* Deprecated OpenCL 1.1 APIs */
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture2D(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
||||
clCreateFromGLTexture3D(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
cl_GLenum target,
|
||||
cl_GLint miplevel,
|
||||
cl_GLuint texture,
|
||||
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
||||
|
||||
/* cl_khr_gl_sharing extension */
|
||||
|
||||
#define cl_khr_gl_sharing 1
|
||||
|
||||
typedef cl_uint cl_gl_context_info;
|
||||
|
||||
/* Additional Error Codes */
|
||||
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
|
||||
|
||||
/* cl_gl_context_info */
|
||||
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
|
||||
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
|
||||
|
||||
/* Additional cl_context_properties */
|
||||
#define CL_GL_CONTEXT_KHR 0x2008
|
||||
#define CL_EGL_DISPLAY_KHR 0x2009
|
||||
#define CL_GLX_DISPLAY_KHR 0x200A
|
||||
#define CL_WGL_HDC_KHR 0x200B
|
||||
#define CL_CGL_SHAREGROUP_KHR 0x200C
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetGLContextInfoKHR(const cl_context_properties * properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
|
||||
const cl_context_properties * properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size,
|
||||
void * param_value,
|
||||
size_t * param_value_size_ret);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_GL_H */
|
||||
40
CL/cl_gl_ext.h
Normal file
40
CL/cl_gl_ext.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_CL_GL_EXT_H
|
||||
#define __OPENCL_CL_GL_EXT_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <CL/cl_gl.h>
|
||||
|
||||
/*
|
||||
* cl_khr_gl_event extension
|
||||
*/
|
||||
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
|
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
||||
clCreateEventFromGLsyncKHR(cl_context context,
|
||||
cl_GLsync cl_GLsync,
|
||||
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_GL_EXT_H */
|
||||
440
CL/cl_half.h
Normal file
440
CL/cl_half.h
Normal file
@ -0,0 +1,440 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2019-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
/**
|
||||
* This is a header-only utility library that provides OpenCL host code with
|
||||
* routines for converting to/from cl_half values.
|
||||
*
|
||||
* Example usage:
|
||||
*
|
||||
* #include <CL/cl_half.h>
|
||||
* ...
|
||||
* cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE);
|
||||
* cl_float f = cl_half_to_float(h);
|
||||
*/
|
||||
|
||||
#ifndef OPENCL_CL_HALF_H
|
||||
#define OPENCL_CL_HALF_H
|
||||
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Rounding mode used when converting to cl_half.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
CL_HALF_RTE, // round to nearest even
|
||||
CL_HALF_RTZ, // round towards zero
|
||||
CL_HALF_RTP, // round towards positive infinity
|
||||
CL_HALF_RTN, // round towards negative infinity
|
||||
} cl_half_rounding_mode;
|
||||
|
||||
|
||||
/* Private utility macros. */
|
||||
#define CL_HALF_EXP_MASK 0x7C00
|
||||
#define CL_HALF_MAX_FINITE_MAG 0x7BFF
|
||||
|
||||
|
||||
/*
|
||||
* Utility to deal with values that overflow when converting to half precision.
|
||||
*/
|
||||
static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode,
|
||||
uint16_t sign)
|
||||
{
|
||||
if (rounding_mode == CL_HALF_RTZ)
|
||||
{
|
||||
// Round overflow towards zero -> largest finite number (preserving sign)
|
||||
return (sign << 15) | CL_HALF_MAX_FINITE_MAG;
|
||||
}
|
||||
else if (rounding_mode == CL_HALF_RTP && sign)
|
||||
{
|
||||
// Round negative overflow towards positive infinity -> most negative finite number
|
||||
return (1 << 15) | CL_HALF_MAX_FINITE_MAG;
|
||||
}
|
||||
else if (rounding_mode == CL_HALF_RTN && !sign)
|
||||
{
|
||||
// Round positive overflow towards negative infinity -> largest finite number
|
||||
return CL_HALF_MAX_FINITE_MAG;
|
||||
}
|
||||
|
||||
// Overflow to infinity
|
||||
return (sign << 15) | CL_HALF_EXP_MASK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Utility to deal with values that underflow when converting to half precision.
|
||||
*/
|
||||
static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode,
|
||||
uint16_t sign)
|
||||
{
|
||||
if (rounding_mode == CL_HALF_RTP && !sign)
|
||||
{
|
||||
// Round underflow towards positive infinity -> smallest positive value
|
||||
return (sign << 15) | 1;
|
||||
}
|
||||
else if (rounding_mode == CL_HALF_RTN && sign)
|
||||
{
|
||||
// Round underflow towards negative infinity -> largest negative value
|
||||
return (sign << 15) | 1;
|
||||
}
|
||||
|
||||
// Flush to zero
|
||||
return (sign << 15);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert a cl_float to a cl_half.
|
||||
*/
|
||||
static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode)
|
||||
{
|
||||
// Type-punning to get direct access to underlying bits
|
||||
union
|
||||
{
|
||||
cl_float f;
|
||||
uint32_t i;
|
||||
} f32;
|
||||
f32.f = f;
|
||||
|
||||
// Extract sign bit
|
||||
uint16_t sign = f32.i >> 31;
|
||||
|
||||
// Extract FP32 exponent and mantissa
|
||||
uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF;
|
||||
uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1);
|
||||
|
||||
// Remove FP32 exponent bias
|
||||
int32_t exp = f_exp - CL_FLT_MAX_EXP + 1;
|
||||
|
||||
// Add FP16 exponent bias
|
||||
uint16_t h_exp = exp + CL_HALF_MAX_EXP - 1;
|
||||
|
||||
// Position of the bit that will become the FP16 mantissa LSB
|
||||
uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG;
|
||||
|
||||
// Check for NaN / infinity
|
||||
if (f_exp == 0xFF)
|
||||
{
|
||||
if (f_mant)
|
||||
{
|
||||
// NaN -> propagate mantissa and silence it
|
||||
uint16_t h_mant = f_mant >> lsb_pos;
|
||||
h_mant |= 0x200;
|
||||
return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Infinity -> zero mantissa
|
||||
return (sign << 15) | CL_HALF_EXP_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for zero
|
||||
if (!f_exp && !f_mant)
|
||||
{
|
||||
return (sign << 15);
|
||||
}
|
||||
|
||||
// Check for overflow
|
||||
if (exp >= CL_HALF_MAX_EXP)
|
||||
{
|
||||
return cl_half_handle_overflow(rounding_mode, sign);
|
||||
}
|
||||
|
||||
// Check for underflow
|
||||
if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
|
||||
{
|
||||
return cl_half_handle_underflow(rounding_mode, sign);
|
||||
}
|
||||
|
||||
// Check for value that will become denormal
|
||||
if (exp < -14)
|
||||
{
|
||||
// Denormal -> include the implicit 1 from the FP32 mantissa
|
||||
h_exp = 0;
|
||||
f_mant |= 1 << (CL_FLT_MANT_DIG - 1);
|
||||
|
||||
// Mantissa shift amount depends on exponent
|
||||
lsb_pos = -exp + (CL_FLT_MANT_DIG - 25);
|
||||
}
|
||||
|
||||
// Generate FP16 mantissa by shifting FP32 mantissa
|
||||
uint16_t h_mant = f_mant >> lsb_pos;
|
||||
|
||||
// Check whether we need to round
|
||||
uint32_t halfway = 1 << (lsb_pos - 1);
|
||||
uint32_t mask = (halfway << 1) - 1;
|
||||
switch (rounding_mode)
|
||||
{
|
||||
case CL_HALF_RTE:
|
||||
if ((f_mant & mask) > halfway)
|
||||
{
|
||||
// More than halfway -> round up
|
||||
h_mant += 1;
|
||||
}
|
||||
else if ((f_mant & mask) == halfway)
|
||||
{
|
||||
// Exactly halfway -> round to nearest even
|
||||
if (h_mant & 0x1)
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
case CL_HALF_RTZ:
|
||||
// Mantissa has already been truncated -> do nothing
|
||||
break;
|
||||
case CL_HALF_RTP:
|
||||
if ((f_mant & mask) && !sign)
|
||||
{
|
||||
// Round positive numbers up
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
case CL_HALF_RTN:
|
||||
if ((f_mant & mask) && sign)
|
||||
{
|
||||
// Round negative numbers down
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Check for mantissa overflow
|
||||
if (h_mant & 0x400)
|
||||
{
|
||||
h_exp += 1;
|
||||
h_mant = 0;
|
||||
}
|
||||
|
||||
return (sign << 15) | (h_exp << 10) | h_mant;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert a cl_double to a cl_half.
|
||||
*/
|
||||
static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode)
|
||||
{
|
||||
// Type-punning to get direct access to underlying bits
|
||||
union
|
||||
{
|
||||
cl_double d;
|
||||
uint64_t i;
|
||||
} f64;
|
||||
f64.d = d;
|
||||
|
||||
// Extract sign bit
|
||||
uint16_t sign = f64.i >> 63;
|
||||
|
||||
// Extract FP64 exponent and mantissa
|
||||
uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF;
|
||||
uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1);
|
||||
|
||||
// Remove FP64 exponent bias
|
||||
int64_t exp = d_exp - CL_DBL_MAX_EXP + 1;
|
||||
|
||||
// Add FP16 exponent bias
|
||||
uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
|
||||
|
||||
// Position of the bit that will become the FP16 mantissa LSB
|
||||
uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG;
|
||||
|
||||
// Check for NaN / infinity
|
||||
if (d_exp == 0x7FF)
|
||||
{
|
||||
if (d_mant)
|
||||
{
|
||||
// NaN -> propagate mantissa and silence it
|
||||
uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
|
||||
h_mant |= 0x200;
|
||||
return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Infinity -> zero mantissa
|
||||
return (sign << 15) | CL_HALF_EXP_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for zero
|
||||
if (!d_exp && !d_mant)
|
||||
{
|
||||
return (sign << 15);
|
||||
}
|
||||
|
||||
// Check for overflow
|
||||
if (exp >= CL_HALF_MAX_EXP)
|
||||
{
|
||||
return cl_half_handle_overflow(rounding_mode, sign);
|
||||
}
|
||||
|
||||
// Check for underflow
|
||||
if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
|
||||
{
|
||||
return cl_half_handle_underflow(rounding_mode, sign);
|
||||
}
|
||||
|
||||
// Check for value that will become denormal
|
||||
if (exp < -14)
|
||||
{
|
||||
// Include the implicit 1 from the FP64 mantissa
|
||||
h_exp = 0;
|
||||
d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1);
|
||||
|
||||
// Mantissa shift amount depends on exponent
|
||||
lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25));
|
||||
}
|
||||
|
||||
// Generate FP16 mantissa by shifting FP64 mantissa
|
||||
uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
|
||||
|
||||
// Check whether we need to round
|
||||
uint64_t halfway = (uint64_t)1 << (lsb_pos - 1);
|
||||
uint64_t mask = (halfway << 1) - 1;
|
||||
switch (rounding_mode)
|
||||
{
|
||||
case CL_HALF_RTE:
|
||||
if ((d_mant & mask) > halfway)
|
||||
{
|
||||
// More than halfway -> round up
|
||||
h_mant += 1;
|
||||
}
|
||||
else if ((d_mant & mask) == halfway)
|
||||
{
|
||||
// Exactly halfway -> round to nearest even
|
||||
if (h_mant & 0x1)
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
case CL_HALF_RTZ:
|
||||
// Mantissa has already been truncated -> do nothing
|
||||
break;
|
||||
case CL_HALF_RTP:
|
||||
if ((d_mant & mask) && !sign)
|
||||
{
|
||||
// Round positive numbers up
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
case CL_HALF_RTN:
|
||||
if ((d_mant & mask) && sign)
|
||||
{
|
||||
// Round negative numbers down
|
||||
h_mant += 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Check for mantissa overflow
|
||||
if (h_mant & 0x400)
|
||||
{
|
||||
h_exp += 1;
|
||||
h_mant = 0;
|
||||
}
|
||||
|
||||
return (sign << 15) | (h_exp << 10) | h_mant;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert a cl_half to a cl_float.
|
||||
*/
|
||||
static inline cl_float cl_half_to_float(cl_half h)
|
||||
{
|
||||
// Type-punning to get direct access to underlying bits
|
||||
union
|
||||
{
|
||||
cl_float f;
|
||||
uint32_t i;
|
||||
} f32;
|
||||
|
||||
// Extract sign bit
|
||||
uint16_t sign = h >> 15;
|
||||
|
||||
// Extract FP16 exponent and mantissa
|
||||
uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
||||
uint16_t h_mant = h & 0x3FF;
|
||||
|
||||
// Remove FP16 exponent bias
|
||||
int32_t exp = h_exp - CL_HALF_MAX_EXP + 1;
|
||||
|
||||
// Add FP32 exponent bias
|
||||
uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1;
|
||||
|
||||
// Check for NaN / infinity
|
||||
if (h_exp == 0x1F)
|
||||
{
|
||||
if (h_mant)
|
||||
{
|
||||
// NaN -> propagate mantissa and silence it
|
||||
uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);
|
||||
f_mant |= 0x400000;
|
||||
f32.i = (sign << 31) | 0x7F800000 | f_mant;
|
||||
return f32.f;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Infinity -> zero mantissa
|
||||
f32.i = (sign << 31) | 0x7F800000;
|
||||
return f32.f;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for zero / denormal
|
||||
if (h_exp == 0)
|
||||
{
|
||||
if (h_mant == 0)
|
||||
{
|
||||
// Zero -> zero exponent
|
||||
f_exp = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Denormal -> normalize it
|
||||
// - Shift mantissa to make most-significant 1 implicit
|
||||
// - Adjust exponent accordingly
|
||||
uint32_t shift = 0;
|
||||
while ((h_mant & 0x400) == 0)
|
||||
{
|
||||
h_mant <<= 1;
|
||||
shift++;
|
||||
}
|
||||
h_mant &= 0x3FF;
|
||||
f_exp -= shift - 1;
|
||||
}
|
||||
}
|
||||
|
||||
f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13);
|
||||
return f32.f;
|
||||
}
|
||||
|
||||
|
||||
#undef CL_HALF_EXP_MASK
|
||||
#undef CL_HALF_MAX_FINITE_MAG
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* OPENCL_CL_HALF_H */
|
||||
1287
CL/cl_icd.h
Normal file
1287
CL/cl_icd.h
Normal file
File diff suppressed because it is too large
Load Diff
1384
CL/cl_platform.h
Normal file
1384
CL/cl_platform.h
Normal file
File diff suppressed because it is too large
Load Diff
160
CL/cl_va_api_media_sharing_intel.h
Normal file
160
CL/cl_va_api_media_sharing_intel.h
Normal file
@ -0,0 +1,160 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
/*****************************************************************************\
|
||||
|
||||
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
|
||||
|
||||
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
|
||||
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
File Name: cl_va_api_media_sharing_intel.h
|
||||
|
||||
Abstract:
|
||||
|
||||
Notes:
|
||||
|
||||
\*****************************************************************************/
|
||||
|
||||
|
||||
#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
|
||||
#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
#include <va/va.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/******************************************
|
||||
* cl_intel_va_api_media_sharing extension *
|
||||
*******************************************/
|
||||
|
||||
#define cl_intel_va_api_media_sharing 1
|
||||
|
||||
/* error codes */
|
||||
#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098
|
||||
#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099
|
||||
#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100
|
||||
#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101
|
||||
|
||||
/* cl_va_api_device_source_intel */
|
||||
#define CL_VA_API_DISPLAY_INTEL 0x4094
|
||||
|
||||
/* cl_va_api_device_set_intel */
|
||||
#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095
|
||||
#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096
|
||||
|
||||
/* cl_context_info */
|
||||
#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098
|
||||
|
||||
/* cl_image_info */
|
||||
#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A
|
||||
#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B
|
||||
|
||||
typedef cl_uint cl_va_api_device_source_intel;
|
||||
typedef cl_uint cl_va_api_device_set_intel;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
|
||||
cl_platform_id platform,
|
||||
cl_va_api_device_source_intel media_adapter_type,
|
||||
void* media_adapter,
|
||||
cl_va_api_device_set_intel media_adapter_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)(
|
||||
cl_platform_id platform,
|
||||
cl_va_api_device_source_intel media_adapter_type,
|
||||
void* media_adapter,
|
||||
cl_va_api_device_set_intel media_adapter_set,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
||||
clCreateFromVA_APIMediaSurfaceINTEL(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
VASurfaceID* surface,
|
||||
cl_uint plane,
|
||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)(
|
||||
cl_context context,
|
||||
cl_mem_flags flags,
|
||||
VASurfaceID* surface,
|
||||
cl_uint plane,
|
||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueAcquireVA_APIMediaSurfacesINTEL(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
||||
clEnqueueReleaseVA_APIMediaSurfacesINTEL(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)(
|
||||
cl_command_queue command_queue,
|
||||
cl_uint num_objects,
|
||||
const cl_mem* mem_objects,
|
||||
cl_uint num_events_in_wait_list,
|
||||
const cl_event* event_wait_list,
|
||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */
|
||||
|
||||
81
CL/cl_version.h
Normal file
81
CL/cl_version.h
Normal file
@ -0,0 +1,81 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2018-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __CL_VERSION_H
|
||||
#define __CL_VERSION_H
|
||||
|
||||
/* Detect which version to target */
|
||||
#if !defined(CL_TARGET_OPENCL_VERSION)
|
||||
#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)")
|
||||
#define CL_TARGET_OPENCL_VERSION 220
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION != 100 && \
|
||||
CL_TARGET_OPENCL_VERSION != 110 && \
|
||||
CL_TARGET_OPENCL_VERSION != 120 && \
|
||||
CL_TARGET_OPENCL_VERSION != 200 && \
|
||||
CL_TARGET_OPENCL_VERSION != 210 && \
|
||||
CL_TARGET_OPENCL_VERSION != 220 && \
|
||||
CL_TARGET_OPENCL_VERSION != 300
|
||||
#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220, 300). Defaulting to 220 (OpenCL 2.2)")
|
||||
#undef CL_TARGET_OPENCL_VERSION
|
||||
#define CL_TARGET_OPENCL_VERSION 220
|
||||
#endif
|
||||
|
||||
|
||||
/* OpenCL Version */
|
||||
#if CL_TARGET_OPENCL_VERSION >= 300 && !defined(CL_VERSION_3_0)
|
||||
#define CL_VERSION_3_0 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
|
||||
#define CL_VERSION_2_2 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
|
||||
#define CL_VERSION_2_1 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
|
||||
#define CL_VERSION_2_0 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
|
||||
#define CL_VERSION_1_2 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
|
||||
#define CL_VERSION_1_1 1
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
|
||||
#define CL_VERSION_1_0 1
|
||||
#endif
|
||||
|
||||
/* Allow deprecated APIs for older OpenCL versions. */
|
||||
#if CL_TARGET_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_2_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_1_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
#endif
|
||||
#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS)
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_0_APIS
|
||||
#endif
|
||||
|
||||
#endif /* __CL_VERSION_H */
|
||||
33
CL/opencl.h
Normal file
33
CL/opencl.h
Normal file
@ -0,0 +1,33 @@
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
******************************************************************************/
|
||||
|
||||
#ifndef __OPENCL_H
|
||||
#define __OPENCL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <CL/cl_gl.h>
|
||||
#include <CL/cl_gl_ext.h>
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __OPENCL_H */
|
||||
190
CMakeLists.txt
Normal file
190
CMakeLists.txt
Normal file
@ -0,0 +1,190 @@
|
||||
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
||||
|
||||
project(BabelStream VERSION 3.5 LANGUAGES CXX)
|
||||
|
||||
#set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
|
||||
# some nicer defaults for standard C++
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
|
||||
#set(MODEL SYCL)
|
||||
#set(SYCL_COMPILER COMPUTECPP)
|
||||
#set(SYCL_COMPILER_DIR /home/tom/Desktop/computecpp_archive/ComputeCpp-CE-2.3.0-x86_64-linux-gnu)
|
||||
#set(MODEL RAJA)
|
||||
#set(RAJA_IN_TREE /home/tom/Downloads/RAJA-v0.13.0/)
|
||||
#set(ENABLE_CUDA ON)
|
||||
#set(TARGET NVIDIA)
|
||||
#set(CUDA_TOOLKIT_ROOT_DIR /opt/cuda-11.2)
|
||||
#set(CUDA_ARCH sm_70)
|
||||
#set(BLT_DIR /home/tom/Downloads/blt-0.3.6/)
|
||||
|
||||
#set(MODEL STD)
|
||||
#set(ARCH cc70)
|
||||
#set(CXX_EXTRA_FLAGS -v)
|
||||
|
||||
#set(MODEL CUDA)
|
||||
#set(ARCH sm_70)
|
||||
#set(CMAKE_CUDA_COMPILER /opt/cuda-11.2/bin/nvcc)
|
||||
|
||||
#set(MODEL OCL)
|
||||
#set(OpenCL_LIBRARY /opt/rocm-4.0.0/opencl/lib/libOpenCL.so)
|
||||
#set(OpenCL_INCLUDE_DIR /opt/rocm-4.0.0/opencl/lib)
|
||||
#set(RELEASE_FLAGS -Ofast)
|
||||
#set(CXX_EXTRA_FLAGS -O2)
|
||||
|
||||
#set(CMAKE_CXX_COMPILER /usr/lib/aomp/bin/clang++)
|
||||
#set(MODEL OMP)
|
||||
##set(OFFLOAD "AMD:gfx803")
|
||||
#set(OFFLOAD "NVIDIA:sm_35")
|
||||
#set(CXX_EXTRA_FLAGS --cuda-path=/opt/cuda-10.2/)
|
||||
|
||||
#set(OFFLOAD "AMD:_70")
|
||||
#set(CXX_EXTRA_FLAGS --cuda-path=/opt/cuda-10.2/ --gcc-toolchain=/home/tom/spack/opt/spack/linux-fedora33-zen2/gcc-10.2.1/gcc-8.3.0-latmjo2hl2yv53255xkwko7k3y7bx2vv)
|
||||
#set(CXX_EXTRA_LINKER_FLAGS )
|
||||
#set(MODEL HIP)
|
||||
|
||||
#set(MODEL KOKKOS)
|
||||
#set(KOKKOS_IN_TREE /home/tom/Downloads/kokkos-3.3.00/)
|
||||
|
||||
# the final executable name
|
||||
set(EXE_NAME babelstream)
|
||||
|
||||
# select default build type
|
||||
set(CMAKE_BUILD_TYPE "Release")
|
||||
# for chrono and some basic CXX features, models can overwrite this if required
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
if (NOT CMAKE_BUILD_TYPE)
|
||||
message("No CMAKE_BUILD_TYPE specified, defaulting to 'Release'")
|
||||
set(CMAKE_BUILD_TYPE "Release")
|
||||
endif ()
|
||||
|
||||
string(TOUPPER ${CMAKE_BUILD_TYPE} BUILD_TYPE)
|
||||
if ((NOT BUILD_TYPE STREQUAL RELEASE) AND (NOT BUILD_TYPE STREQUAL DEBUG))
|
||||
message(FATAL_ERROR "Only Release or Debug is supported, got `${CMAKE_BUILD_TYPE}`")
|
||||
endif ()
|
||||
|
||||
# setup some defaults flags for everything
|
||||
set(DEFAULT_DEBUG_FLAGS -O2 -fno-omit-frame-pointer)
|
||||
set(DEFAULT_RELEASE_FLAGS -O3 -march=native)
|
||||
|
||||
macro(hint_flag FLAG DESCRIPTION)
|
||||
if (NOT DEFINED ${FLAG})
|
||||
message(STATUS "${FLAG}: ${DESCRIPTION}")
|
||||
else ()
|
||||
# i.e. `-DFOO="-a -b"` becomes CMake's semicolon separated list `FOO=`-a;-b`
|
||||
separate_arguments(${FLAG})
|
||||
endif ()
|
||||
endmacro()
|
||||
|
||||
# hint common extra flag options for all models if they are not set
|
||||
hint_flag(CXX_EXTRA_FLAGS "
|
||||
Appends to common compile flags. These will be appended at link phase as well.
|
||||
To use separate flags at link phase, set `CXX_EXTRA_LINK_FLAGS`")
|
||||
hint_flag(CXX_EXTRA_LINK_FLAGS "
|
||||
Appends to link flags which appear *before* the objects.
|
||||
Do not use this for linking libraries, as the link line is order-dependent")
|
||||
hint_flag(CXX_EXTRA_LIBRARIES "
|
||||
Append to link flags which appear *after* the objects.
|
||||
Use this for linking extra libraries (e.g `-lmylib`, or simply `mylib`)")
|
||||
hint_flag(CXX_EXTRA_LINKER_FLAGS "
|
||||
Append to linker flags (i.e GCC's `-Wl` or equivalent)")
|
||||
|
||||
# copy CXX_EXTRA_FLAGS <- CXX_EXTRA_LINK_FLAGS
|
||||
if ((DEFINED CXX_EXTRA_FLAGS) AND (NOT DEFINED CXX_EXTRA_LINK_FLAGS))
|
||||
set(CXX_EXTRA_LINK_FLAGS ${CXX_EXTRA_FLAGS})
|
||||
endif ()
|
||||
|
||||
# include our macros
|
||||
include(register_models.cmake)
|
||||
|
||||
# register out models <model_name> <preprocessor_def_name> <source files...>
|
||||
register_model(OMP OMP OMPStream.cpp)
|
||||
register_model(OCL OCL OCLStream.cpp)
|
||||
register_model(STD STD STDStream.cpp)
|
||||
register_model(STD20 STD20 STD20Stream.cpp)
|
||||
register_model(HIP HIP HIPStream.cpp)
|
||||
register_model(CUDA CUDA CUDAStream.cu)
|
||||
register_model(KOKKOS KOKKOS KokkosStream.cpp)
|
||||
register_model(SYCL SYCL SYCLStream.cpp)
|
||||
register_model(ACC ACC ACCStream.cpp)
|
||||
# defining RAJA collides with the RAJA namespace so USE_RAJA
|
||||
register_model(RAJA USE_RAJA RAJAStream.cpp)
|
||||
|
||||
|
||||
set(USAGE ON CACHE BOOL "Whether to print all custom flags for the selected model")
|
||||
|
||||
message(STATUS "Available models: ${REGISTERED_MODELS}")
|
||||
if (NOT DEFINED MODEL)
|
||||
message(FATAL_ERROR "MODEL is unspecified, pick one from the available models")
|
||||
else ()
|
||||
message(STATUS "Selected model : ${MODEL}")
|
||||
endif ()
|
||||
|
||||
# load the $MODEL.cmake file and setup the correct IMPL_* based on $MODEL
|
||||
load_model(${MODEL})
|
||||
|
||||
if (USAGE)
|
||||
# print the usage of the registered flag options
|
||||
registered_flags_action(print RESULT)
|
||||
message(STATUS "${RESULT}")
|
||||
endif ()
|
||||
# check required/set default for all registered flag options
|
||||
registered_flags_action(check RESULT)
|
||||
message(STATUS "${RESULT}")
|
||||
|
||||
# run model specific setup, i.e append build flags, etc
|
||||
setup()
|
||||
|
||||
# CMake insists that -O2 (or equivalent) is the universally accepted optimisation level
|
||||
# we remove that here and use our own <BUILD_TYPE>_FLAGS
|
||||
wipe_gcc_style_optimisation_flags(CMAKE_CXX_FLAGS_${BUILD_TYPE})
|
||||
|
||||
message(STATUS "Default ${CMAKE_BUILD_TYPE} flags are `${DEFAULT_${BUILD_TYPE}_FLAGS}`, set ${BUILD_TYPE}_FLAGS to override (CXX_EXTRA_* flags are not affected)")
|
||||
|
||||
# setup common build flag defaults if there are no overrides
|
||||
if (NOT DEFINED ${BUILD_TYPE}_FLAGS)
|
||||
set(ACTUAL_${BUILD_TYPE}_FLAGS ${DEFAULT_${BUILD_TYPE}_FLAGS})
|
||||
elseif()
|
||||
set(ACTUAL_${BUILD_TYPE}_FLAGS ${${BUILD_TYPE}_FLAGS})
|
||||
endif ()
|
||||
|
||||
|
||||
message(STATUS "CXX vendor : ${CMAKE_CXX_COMPILER_ID} (${CMAKE_CXX_COMPILER})")
|
||||
message(STATUS "Platform : ${CMAKE_SYSTEM_PROCESSOR}")
|
||||
message(STATUS "Sources : ${IMPL_SOURCES}")
|
||||
message(STATUS "Libraries : ${LINK_LIBRARIES}")
|
||||
message(STATUS "CXX Flags : ${CMAKE_CXX_FLAGS_${BUILD_TYPE}} ${ACTUAL_${BUILD_TYPE}_FLAGS} ${CXX_EXTRA_FLAGS}
|
||||
CXX flags derived from (CMake + (Override ? Override : Default) + Extras), where:
|
||||
CMake = `${CMAKE_CXX_FLAGS_${BUILD_TYPE}}`
|
||||
Default = `${DEFAULT_${BUILD_TYPE}_FLAGS}`
|
||||
Override (RELEASE_FLAGS) = `${${BUILD_TYPE}_FLAGS}`
|
||||
Extras (CXX_EXTRA_FLAGS) = `${CXX_EXTRA_FLAGS}`")
|
||||
message(STATUS "Link Flags : ${LINK_FLAGS} ${CXX_EXTRA_LINK_FLAGS}")
|
||||
message(STATUS "Linker Flags: ${CMAKE_EXE_LINKER_FLAGS} ${CXX_EXTRA_LINKER_FLAGS} ")
|
||||
message(STATUS "Defs : ${IMPL_DEFINITIONS}")
|
||||
message(STATUS "Executable : ${EXE_NAME}")
|
||||
|
||||
# below we have all the usual CMake target setup steps
|
||||
|
||||
add_executable(${EXE_NAME} ${IMPL_SOURCES} main.cpp)
|
||||
target_link_libraries(${EXE_NAME} PUBLIC ${LINK_LIBRARIES})
|
||||
target_compile_definitions(${EXE_NAME} PUBLIC ${IMPL_DEFINITIONS})
|
||||
|
||||
if (CXX_EXTRA_LIBRARIES)
|
||||
target_link_libraries(${EXE_NAME} PUBLIC ${CXX_EXTRA_LIBRARIES})
|
||||
endif ()
|
||||
|
||||
target_compile_options(${EXE_NAME} PUBLIC "$<$<CONFIG:Release>:${ACTUAL_RELEASE_FLAGS};${CXX_EXTRA_FLAGS}>")
|
||||
target_compile_options(${EXE_NAME} PUBLIC "$<$<CONFIG:Debug>:${ACTUAL_DEBUG_FLAGS};${CXX_EXTRA_FLAGS}>")
|
||||
|
||||
target_link_options(${EXE_NAME} PUBLIC LINKER:${CXX_EXTRA_LINKER_FLAGS})
|
||||
target_link_options(${EXE_NAME} PUBLIC ${LINK_FLAGS} ${CXX_EXTRA_LINK_FLAGS})
|
||||
|
||||
# some models require the target to be already specified so they can finish their setup here
|
||||
# this only happens if the MODEL.cmake definition contains the `setup_target` macro
|
||||
if (COMMAND setup_target)
|
||||
setup_target(${EXE_NAME})
|
||||
endif ()
|
||||
43
CUDA.cmake
Normal file
43
CUDA.cmake
Normal file
@ -0,0 +1,43 @@
|
||||
|
||||
register_flag_optional(CMAKE_CXX_COMPILER
|
||||
"Any CXX compiler that is supported by CMake detection, this is used for host compilation"
|
||||
"c++")
|
||||
|
||||
register_flag_optional(MEM "Device memory mode:
|
||||
DEFAULT - allocate host and device memory pointers.
|
||||
MANAGED - use CUDA Managed Memory.
|
||||
PAGEFAULT - shared memory, only host pointers allocated."
|
||||
"DEFAULT")
|
||||
|
||||
register_flag_required(CMAKE_CUDA_COMPILER
|
||||
"Path to the CUDA nvcc compiler")
|
||||
|
||||
# XXX we may want to drop this eventually and use CMAKE_CUDA_ARCHITECTURES directly
|
||||
register_flag_required(CUDA_ARCH
|
||||
"Nvidia architecture, will be passed in via `-arch=` (e.g `sm_70`) for nvcc")
|
||||
|
||||
register_flag_optional(CUDA_EXTRA_FLAGS
|
||||
"Additional CUDA flags passed to nvcc, this is appended after `CUDA_ARCH`"
|
||||
"")
|
||||
|
||||
|
||||
macro(setup)
|
||||
|
||||
# XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes
|
||||
if(POLICY CMP0104)
|
||||
cmake_policy(SET CMP0104 OLD)
|
||||
endif()
|
||||
|
||||
enable_language(CUDA)
|
||||
register_definitions(MEM=${MEM})
|
||||
|
||||
# add -forward-unknown-to-host-compiler for compatibility reasons
|
||||
set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-forward-unknown-to-host-compiler -arch=${CUDA_ARCH}" ${CUDA_EXTRA_FLAGS})
|
||||
|
||||
# CMake defaults to -O2 for CUDA at Release, let's wipe that and use the global RELEASE_FLAG
|
||||
# appended later
|
||||
wipe_gcc_style_optimisation_flags(CMAKE_CUDA_FLAGS_${BUILD_TYPE})
|
||||
|
||||
message(STATUS "NVCC flags: ${CMAKE_CUDA_FLAGS} ${CMAKE_CUDA_FLAGS_${BUILD_TYPE}}")
|
||||
endmacro()
|
||||
|
||||
7
HIP.cmake
Normal file
7
HIP.cmake
Normal file
@ -0,0 +1,7 @@
|
||||
|
||||
register_flag_required(CMAKE_CXX_COMPILER
|
||||
"Absolute path to the AMD HIP C++ compiler")
|
||||
|
||||
macro(setup)
|
||||
# nothing to do here as hipcc does everything correctly, what a surprise!
|
||||
endmacro()
|
||||
40
KOKKOS.cmake
Normal file
40
KOKKOS.cmake
Normal file
@ -0,0 +1,40 @@
|
||||
|
||||
register_flag_optional(CMAKE_CXX_COMPILER
|
||||
"Any CXX compiler that is supported by CMake detection and RAJA.
|
||||
See https://github.com/kokkos/kokkos#primary-tested-compilers-on-x86-are"
|
||||
"c++")
|
||||
|
||||
register_flag_required(KOKKOS_IN_TREE
|
||||
"Absolute path to the *source* distribution directory of Kokkos.
|
||||
Remember to append Kokkos specific flags as well, for example:
|
||||
|
||||
-DKOKKOS_IN_TREE=... -DKokkos_ENABLE_OPENMP=ON -DKokkos_ARCH_ZEN=ON ...
|
||||
|
||||
See https://github.com/kokkos/kokkos/blob/master/BUILD.md for all available options")
|
||||
|
||||
# compiler vendor and arch specific flags
|
||||
set(KOKKOS_FLAGS_CPU_INTEL -qopt-streaming-stores=always)
|
||||
|
||||
macro(setup)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
cmake_policy(SET CMP0074 NEW) #see https://github.com/kokkos/kokkos/blob/master/BUILD.md
|
||||
|
||||
message(STATUS "Building using in-tree Kokkos source at `${KOKKOS_IN_TREE}`")
|
||||
|
||||
if (EXISTS "${KOKKOS_IN_TREE}")
|
||||
add_subdirectory(${KOKKOS_IN_TREE} ${CMAKE_BINARY_DIR}/kokkos)
|
||||
register_link_library(Kokkos::kokkos)
|
||||
else ()
|
||||
message(FATAL_ERROR "`${KOKKOS_IN_TREE}` does not exist")
|
||||
endif ()
|
||||
|
||||
register_append_compiler_and_arch_specific_cxx_flags(
|
||||
KOKKOS_FLAGS_CPU
|
||||
${CMAKE_CXX_COMPILER_ID}
|
||||
${CMAKE_SYSTEM_PROCESSOR}
|
||||
)
|
||||
|
||||
endmacro()
|
||||
|
||||
|
||||
17
OCL.cmake
Normal file
17
OCL.cmake
Normal file
@ -0,0 +1,17 @@
|
||||
|
||||
register_flag_optional(CMAKE_CXX_COMPILER
|
||||
"Any CXX compiler that is supported by CMake detection"
|
||||
"c++")
|
||||
|
||||
register_flag_optional(OpenCL_LIBRARY
|
||||
"Path to OpenCL library, usually called libOpenCL.so"
|
||||
"${OpenCL_LIBRARY}")
|
||||
|
||||
|
||||
macro(setup)
|
||||
# don't point to the CL dir as the imports already have the CL prefix
|
||||
set(OpenCL_INCLUDE_DIR "${CMAKE_SOURCE_DIR}")
|
||||
find_package(OpenCL REQUIRED)
|
||||
register_link_library(OpenCL::OpenCL)
|
||||
endmacro()
|
||||
|
||||
174
OMP.cmake
Normal file
174
OMP.cmake
Normal file
@ -0,0 +1,174 @@
|
||||
# Compiler ID for reference (as of CMake 3.13)
|
||||
# Absoft = Absoft Fortran (absoft.com)
|
||||
# ADSP = Analog VisualDSP++ (analog.com)
|
||||
# AppleClang = Apple Clang (apple.com)
|
||||
# ARMCC = ARM Compiler (arm.com)
|
||||
# Bruce = Bruce C Compiler
|
||||
# CCur = Concurrent Fortran (ccur.com)
|
||||
# Clang = LLVM Clang (clang.llvm.org)
|
||||
# Cray = Cray Compiler (cray.com)
|
||||
# Embarcadero, Borland = Embarcadero (embarcadero.com)
|
||||
# G95 = G95 Fortran (g95.org)
|
||||
# GNU = GNU Compiler Collection (gcc.gnu.org)
|
||||
# HP = Hewlett-Packard Compiler (hp.com)
|
||||
# IAR = IAR Systems (iar.com)
|
||||
# Intel = Intel Compiler (intel.com)
|
||||
# MIPSpro = SGI MIPSpro (sgi.com)
|
||||
# MSVC = Microsoft Visual Studio (microsoft.com)
|
||||
# NVIDIA = NVIDIA CUDA Compiler (nvidia.com)
|
||||
# OpenWatcom = Open Watcom (openwatcom.org)
|
||||
# PGI = The Portland Group (pgroup.com)
|
||||
# Flang = Flang Fortran Compiler
|
||||
# PathScale = PathScale (pathscale.com)
|
||||
# SDCC = Small Device C Compiler (sdcc.sourceforge.net)
|
||||
# SunPro = Oracle Solaris Studio (oracle.com)
|
||||
# TI = Texas Instruments (ti.com)
|
||||
# TinyCC = Tiny C Compiler (tinycc.org)
|
||||
# XL, VisualAge, zOS = IBM XL (ibm.com)
|
||||
|
||||
# These are only added in CMake 3.15:
|
||||
# ARMClang = ARM Compiler based on Clang (arm.com)
|
||||
# These are only added in CMake 3.20:
|
||||
# NVHPC = NVIDIA HPC SDK Compiler (nvidia.com)
|
||||
|
||||
|
||||
# CMAKE_SYSTEM_PROCESSOR is set via `uname -p`, we have:
|
||||
# Power9 = ppc64le
|
||||
# x64 = x86_64
|
||||
# arm64 = aarch64
|
||||
#
|
||||
|
||||
|
||||
#predefined offload flags based on compiler id
|
||||
|
||||
|
||||
set(OMP_FLAGS_OFFLOAD_INTEL
|
||||
-qnextgen -fiopenmp -fopenmp-targets=spir64)
|
||||
set(OMP_FLAGS_OFFLOAD_GNU_NVIDIA
|
||||
-foffload=nvptx-none)
|
||||
set(OMP_FLAGS_OFFLOAD_GNU_AMD
|
||||
-foffload=amdgcn-amdhsa)
|
||||
set(OMP_FLAGS_OFFLOAD_CLANG_NVIDIA
|
||||
-fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda)
|
||||
set(OMP_FLAGS_OFFLOAD_CLANG_AMD
|
||||
-fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa)
|
||||
set(OMP_FLAGS_OFFLOAD_CLANG_ARCH_FLAG
|
||||
-march=) # prefix only, arch appended by the vendor:arch tuple
|
||||
|
||||
|
||||
set(OMP_FLAGS_CPU_INTEL
|
||||
-qopt-streaming-stores=always)
|
||||
set(OMP_FLAGS_CPU_GNU_PPC64LE
|
||||
-mcpu=native)
|
||||
set(OMP_FLAGS_CPU_XL
|
||||
-O5 -qarch=auto -qtune=auto)
|
||||
|
||||
# NEC
|
||||
set(OMP_FLAGS_CPU_NEC -O4 -finline)
|
||||
|
||||
register_flag_optional(CMAKE_CXX_COMPILER
|
||||
"Any CXX compiler that supports OpenMP as per CMake detection (and offloading if enabled with `OFFLOAD`)"
|
||||
"c++")
|
||||
|
||||
register_flag_optional(ARCH
|
||||
"This overrides CMake's CMAKE_SYSTEM_PROCESSOR detection which uses (uname -p), this is mainly for use with
|
||||
specialised accelerators only and not to be confused with offload which is is mutually exclusive with this.
|
||||
Supported values are:
|
||||
- NEC"
|
||||
"")
|
||||
|
||||
register_flag_optional(OFFLOAD
|
||||
"Whether to use OpenMP offload, the format is <VENDOR:ARCH?>|ON|OFF.
|
||||
We support a small set of known offload flags for clang, gcc, and icpx.
|
||||
However, as offload support is rapidly evolving, we recommend you directly supply them via OFFLOAD_FLAGS.
|
||||
For example:
|
||||
* OFFLOAD=NVIDIA:sm_60
|
||||
* OFFLOAD=AMD:gfx906
|
||||
* OFFLOAD=INTEL
|
||||
* OFFLOAD=ON OFFLOAD_FLAGS=..."
|
||||
OFF)
|
||||
|
||||
register_flag_optional(OFFLOAD_FLAGS
|
||||
"If OFFLOAD is enabled, this *overrides* the default offload flags"
|
||||
"")
|
||||
|
||||
register_flag_optional(OFFLOAD_APPEND_LINK_FLAG
|
||||
"If enabled, this appends all resolved offload flags (OFFLOAD=<vendor:arch> or directly from OFFLOAD_FLAGS) to the link flags.
|
||||
This is required for most offload implementations so that offload libraries can linked correctly."
|
||||
ON)
|
||||
|
||||
|
||||
macro(setup)
|
||||
find_package(OpenMP REQUIRED)
|
||||
register_link_library(OpenMP::OpenMP_CXX)
|
||||
|
||||
string(TOUPPER ${CMAKE_CXX_COMPILER_ID} COMPILER)
|
||||
if(NOT ARCH)
|
||||
string(TOUPPER ${CMAKE_SYSTEM_PROCESSOR} ARCH)
|
||||
else()
|
||||
message(STATUS "Using custom arch: ${ARCH}")
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
if (("${OFFLOAD}" STREQUAL OFF) OR (NOT DEFINED OFFLOAD))
|
||||
# no offload
|
||||
|
||||
# resolve the CPU specific flags
|
||||
# starting with ${COMPILER_VENDOR}_${PLATFORM_ARCH}, then try ${COMPILER_VENDOR}, and then give up
|
||||
register_append_compiler_and_arch_specific_cxx_flags(
|
||||
OMP_FLAGS_CPU
|
||||
${COMPILER}
|
||||
${ARCH}
|
||||
)
|
||||
|
||||
elseif ("${OFFLOAD}" STREQUAL ON)
|
||||
# offload but with custom flags
|
||||
register_definitions(OMP_TARGET_GPU)
|
||||
separate_arguments(OFFLOAD_FLAGS)
|
||||
set(OMP_FLAGS ${OFFLOAD_FLAGS})
|
||||
elseif ((DEFINED OFFLOAD) AND OFFLOAD_FLAGS)
|
||||
# offload but OFFLOAD_FLAGS overrides
|
||||
register_definitions(OMP_TARGET_GPU)
|
||||
separate_arguments(OFFLOAD_FLAGS)
|
||||
list(OMP_FLAGS APPEND ${OFFLOAD_FLAGS})
|
||||
else ()
|
||||
|
||||
# handle the vendor:arch value
|
||||
string(REPLACE ":" ";" OFFLOAD_TUPLE "${OFFLOAD}")
|
||||
|
||||
list(LENGTH OFFLOAD_TUPLE LEN)
|
||||
if (LEN EQUAL 1)
|
||||
# offload with <vendor> tuple
|
||||
list(GET OFFLOAD_TUPLE 0 OFFLOAD_VENDOR)
|
||||
# append OMP_FLAGS_OFFLOAD_<vendor> if exists
|
||||
list(APPEND OMP_FLAGS ${OMP_FLAGS_OFFLOAD_${OFFLOAD_VENDOR}})
|
||||
|
||||
elseif (LEN EQUAL 2)
|
||||
# offload with <vendor:arch> tuple
|
||||
list(GET OFFLOAD_TUPLE 0 OFFLOAD_VENDOR)
|
||||
list(GET OFFLOAD_TUPLE 1 OFFLOAD_ARCH)
|
||||
|
||||
# append OMP_FLAGS_OFFLOAD_<compiler>_<vendor> if exists
|
||||
list(APPEND OMP_FLAGS ${OMP_FLAGS_OFFLOAD_${COMPILER}_${OFFLOAD_VENDOR}})
|
||||
# append offload arch if OMP_FLAGS_OFFLOAD_<compiler>_ARCH_FLAG if exists
|
||||
if (DEFINED OMP_FLAGS_OFFLOAD_${COMPILER}_ARCH_FLAG)
|
||||
list(APPEND OMP_FLAGS
|
||||
"${OMP_FLAGS_OFFLOAD_${COMPILER}_ARCH_FLAG}${OFFLOAD_ARCH}")
|
||||
endif ()
|
||||
else ()
|
||||
message(FATAL_ERROR "Unrecognised OFFLOAD format: `${OFFLOAD}`, consider directly using OFFLOAD_FLAGS")
|
||||
endif ()
|
||||
|
||||
endif ()
|
||||
|
||||
|
||||
message(STATUS "OMP CXX flags : ${OMP_FLAGS}")
|
||||
message(STATUS "OMP Link flags : ${OMP_LINK_FLAGS}")
|
||||
# propagate flags to linker so that it links with the offload stuff as well
|
||||
register_append_cxx_flags(ANY ${OMP_FLAGS})
|
||||
if (OFFLOAD_APPEND_LINK_FLAG)
|
||||
register_append_link_flags(${OMP_FLAGS})
|
||||
endif ()
|
||||
endmacro()
|
||||
|
||||
98
RAJA.cmake
Normal file
98
RAJA.cmake
Normal file
@ -0,0 +1,98 @@
|
||||
|
||||
register_flag_optional(CMAKE_CXX_COMPILER
|
||||
"Any CXX compiler that is supported by CMake detection and RAJA.
|
||||
See https://raja.readthedocs.io/en/main/getting_started.html#build-and-install"
|
||||
"c++")
|
||||
|
||||
register_flag_required(RAJA_IN_TREE
|
||||
"Absolute path to the *source* distribution directory of RAJA.
|
||||
Make sure to use the release version of RAJA or clone RAJA recursively with submodules.
|
||||
Remember to append RAJA specific flags as well, for example:
|
||||
|
||||
-DRAJA_IN_TREE=... -DENABLE_OPENMP=ON -DENABLE_CUDA=ON ...
|
||||
|
||||
See https://github.com/LLNL/RAJA/blob/08cbbafd2d21589ebf341f7275c229412d0fe903/CMakeLists.txt#L44 for all available options
|
||||
")
|
||||
|
||||
register_flag_optional(TARGET
|
||||
"Target offload device, implemented values are CPU, NVIDIA"
|
||||
CPU)
|
||||
|
||||
register_flag_optional(CUDA_TOOLKIT_ROOT_DIR
|
||||
"[TARGET==NVIDIA only] Path to the CUDA toolkit directory (e.g `/opt/cuda-11.2`) if the ENABLE_CUDA flag is specified for RAJA" "")
|
||||
|
||||
# XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes
|
||||
register_flag_optional(CUDA_ARCH
|
||||
"[TARGET==NVIDIA only] Nvidia architecture, will be passed in via `-arch=` (e.g `sm_70`) for nvcc"
|
||||
"")
|
||||
|
||||
register_flag_optional(CUDA_EXTRA_FLAGS
|
||||
"[TARGET==NVIDIA only] Additional CUDA flags passed to nvcc, this is appended after `CUDA_ARCH`"
|
||||
"")
|
||||
|
||||
# compiler vendor and arch specific flags
|
||||
set(RAJA_FLAGS_CPU_INTEL -qopt-streaming-stores=always)
|
||||
|
||||
macro(setup)
|
||||
|
||||
|
||||
if (${TARGET} STREQUAL "CPU")
|
||||
register_definitions(RAJA_TARGET_CPU)
|
||||
else ()
|
||||
register_definitions(RAJA_TARGET_GPU)
|
||||
endif ()
|
||||
|
||||
|
||||
if (EXISTS "${RAJA_IN_TREE}")
|
||||
|
||||
message(STATUS "Building using in-tree RAJA source at `${RAJA_IN_TREE}`")
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
# don't build anything that isn't the RAJA library itself, by default their cmake def builds everything, whyyy?
|
||||
set(ENABLE_TESTS OFF CACHE BOOL "")
|
||||
set(ENABLE_EXAMPLES OFF CACHE BOOL "")
|
||||
set(ENABLE_REPRODUCERS OFF CACHE BOOL "")
|
||||
set(ENABLE_EXERCISES OFF CACHE BOOL "")
|
||||
set(ENABLE_DOCUMENTATION OFF CACHE BOOL "")
|
||||
set(ENABLE_BENCHMARKS OFF CACHE BOOL "")
|
||||
set(ENABLE_CUDA ${ENABLE_CUDA} CACHE BOOL "" FORCE)
|
||||
|
||||
if (ENABLE_CUDA)
|
||||
|
||||
# XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes
|
||||
if(POLICY CMP0104)
|
||||
cmake_policy(SET CMP0104 OLD)
|
||||
endif()
|
||||
|
||||
# RAJA needs all the cuda stuff setup before including!
|
||||
set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc)
|
||||
set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-forward-unknown-to-host-compiler -extended-lambda -arch=${CUDA_ARCH}" ${CUDA_EXTRA_FLAGS})
|
||||
list(APPEND CMAKE_CUDA_FLAGS)
|
||||
|
||||
message(STATUS "NVCC flags: ${CMAKE_CUDA_FLAGS}")
|
||||
endif ()
|
||||
|
||||
add_subdirectory(${RAJA_IN_TREE} ${CMAKE_BINARY_DIR}/raja)
|
||||
register_link_library(RAJA)
|
||||
# RAJA's cmake screws with where the binary will end up, resetting it here:
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
|
||||
else ()
|
||||
message(FATAL_ERROR "`${RAJA_IN_TREE}` does not exist")
|
||||
endif ()
|
||||
|
||||
|
||||
if (ENABLE_CUDA)
|
||||
# RAJA needs the codebase to be compiled with nvcc, so we tell cmake to treat sources as *.cu
|
||||
enable_language(CUDA)
|
||||
set_source_files_properties(RAJAStream.cpp PROPERTIES LANGUAGE CUDA)
|
||||
set_source_files_properties(main.cpp PROPERTIES LANGUAGE CUDA)
|
||||
endif ()
|
||||
|
||||
|
||||
register_append_compiler_and_arch_specific_cxx_flags(
|
||||
RAJA_FLAGS_CPU
|
||||
${CMAKE_CXX_COMPILER_ID}
|
||||
${CMAKE_SYSTEM_PROCESSOR}
|
||||
)
|
||||
|
||||
endmacro()
|
||||
@ -5,10 +5,10 @@
|
||||
// For full license terms please see the LICENSE file distributed with this
|
||||
// source code
|
||||
|
||||
#include <stdexcept>
|
||||
#include "RAJAStream.hpp"
|
||||
|
||||
using RAJA::forall;
|
||||
using RAJA::RangeSegment;
|
||||
|
||||
#ifndef ALIGNMENT
|
||||
#define ALIGNMENT (2*1024*1024) // 2MB
|
||||
@ -16,10 +16,8 @@ using RAJA::RangeSegment;
|
||||
|
||||
template <class T>
|
||||
RAJAStream<T>::RAJAStream(const int ARRAY_SIZE, const int device_index)
|
||||
: array_size(ARRAY_SIZE)
|
||||
: array_size(ARRAY_SIZE), range(0, ARRAY_SIZE)
|
||||
{
|
||||
RangeSegment seg(0, ARRAY_SIZE);
|
||||
index_set.push_back(seg);
|
||||
|
||||
#ifdef RAJA_TARGET_CPU
|
||||
d_a = (T*)aligned_alloc(ALIGNMENT, sizeof(T)*array_size);
|
||||
@ -53,7 +51,7 @@ void RAJAStream<T>::init_arrays(T initA, T initB, T initC)
|
||||
T* RAJA_RESTRICT a = d_a;
|
||||
T* RAJA_RESTRICT b = d_b;
|
||||
T* RAJA_RESTRICT c = d_c;
|
||||
forall<policy>(index_set, [=] RAJA_DEVICE (RAJA::Index_type index)
|
||||
forall<policy>(range, [=] RAJA_DEVICE (RAJA::Index_type index)
|
||||
{
|
||||
a[index] = initA;
|
||||
b[index] = initB;
|
||||
@ -75,7 +73,7 @@ void RAJAStream<T>::copy()
|
||||
{
|
||||
T* RAJA_RESTRICT a = d_a;
|
||||
T* RAJA_RESTRICT c = d_c;
|
||||
forall<policy>(index_set, [=] RAJA_DEVICE (RAJA::Index_type index)
|
||||
forall<policy>(range, [=] RAJA_DEVICE (RAJA::Index_type index)
|
||||
{
|
||||
c[index] = a[index];
|
||||
});
|
||||
@ -87,7 +85,7 @@ void RAJAStream<T>::mul()
|
||||
T* RAJA_RESTRICT b = d_b;
|
||||
T* RAJA_RESTRICT c = d_c;
|
||||
const T scalar = startScalar;
|
||||
forall<policy>(index_set, [=] RAJA_DEVICE (RAJA::Index_type index)
|
||||
forall<policy>(range, [=] RAJA_DEVICE (RAJA::Index_type index)
|
||||
{
|
||||
b[index] = scalar*c[index];
|
||||
});
|
||||
@ -99,7 +97,7 @@ void RAJAStream<T>::add()
|
||||
T* RAJA_RESTRICT a = d_a;
|
||||
T* RAJA_RESTRICT b = d_b;
|
||||
T* RAJA_RESTRICT c = d_c;
|
||||
forall<policy>(index_set, [=] RAJA_DEVICE (RAJA::Index_type index)
|
||||
forall<policy>(range, [=] RAJA_DEVICE (RAJA::Index_type index)
|
||||
{
|
||||
c[index] = a[index] + b[index];
|
||||
});
|
||||
@ -112,12 +110,20 @@ void RAJAStream<T>::triad()
|
||||
T* RAJA_RESTRICT b = d_b;
|
||||
T* RAJA_RESTRICT c = d_c;
|
||||
const T scalar = startScalar;
|
||||
forall<policy>(index_set, [=] RAJA_DEVICE (RAJA::Index_type index)
|
||||
forall<policy>(range, [=] RAJA_DEVICE (RAJA::Index_type index)
|
||||
{
|
||||
a[index] = b[index] + scalar*c[index];
|
||||
});
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void RAJAStream<T>::nstream()
|
||||
{
|
||||
// TODO implement me!
|
||||
std::cerr << "Not implemented yet!" << std::endl;
|
||||
throw std::runtime_error("Not implemented yet!");
|
||||
}
|
||||
|
||||
template <class T>
|
||||
T RAJAStream<T>::dot()
|
||||
{
|
||||
@ -126,7 +132,7 @@ T RAJAStream<T>::dot()
|
||||
|
||||
RAJA::ReduceSum<reduce_policy, T> sum(0.0);
|
||||
|
||||
forall<policy>(index_set, [=] RAJA_DEVICE (RAJA::Index_type index)
|
||||
forall<policy>(range, [=] RAJA_DEVICE (RAJA::Index_type index)
|
||||
{
|
||||
sum += a[index] * b[index];
|
||||
});
|
||||
|
||||
@ -8,34 +8,41 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include "RAJA/RAJA.hxx"
|
||||
#include "RAJA/RAJA.hpp"
|
||||
|
||||
#include "Stream.h"
|
||||
|
||||
#define IMPLEMENTATION_STRING "RAJA"
|
||||
|
||||
#ifdef RAJA_TARGET_CPU
|
||||
typedef RAJA::IndexSet::ExecPolicy<
|
||||
RAJA::seq_segit,
|
||||
RAJA::omp_parallel_for_exec> policy;
|
||||
// TODO verify old and new templates are semantically equal
|
||||
//typedef RAJA::ExecPolicy<
|
||||
// RAJA::seq_segit,
|
||||
// RAJA::omp_parallel_for_exec> policy;
|
||||
|
||||
typedef RAJA::omp_parallel_for_exec policy;
|
||||
typedef RAJA::omp_reduce reduce_policy;
|
||||
#else
|
||||
const size_t block_size = 128;
|
||||
typedef RAJA::IndexSet::ExecPolicy<
|
||||
RAJA::seq_segit,
|
||||
RAJA::cuda_exec<block_size>> policy;
|
||||
typedef RAJA::cuda_reduce<block_size> reduce_policy;
|
||||
// TODO verify old and new templates are semantically equal
|
||||
//typedef RAJA::IndexSet::ExecPolicy<
|
||||
// RAJA::seq_segit,
|
||||
// RAJA::cuda_exec<block_size>> policy;
|
||||
//typedef RAJA::cuda_reduce<block_size> reduce_policy;
|
||||
typedef RAJA::cuda_exec<block_size> policy;
|
||||
typedef RAJA::cuda_reduce reduce_policy;
|
||||
#endif
|
||||
|
||||
using RAJA::RangeSegment;
|
||||
|
||||
|
||||
template <class T>
|
||||
class RAJAStream : public Stream<T>
|
||||
{
|
||||
protected:
|
||||
// Size of arrays
|
||||
int array_size;
|
||||
|
||||
// Contains iteration space
|
||||
RAJA::IndexSet index_set;
|
||||
const int array_size;
|
||||
const RangeSegment range;
|
||||
|
||||
// Device side pointers to arrays
|
||||
T* d_a;
|
||||
@ -51,6 +58,7 @@ class RAJAStream : public Stream<T>
|
||||
virtual void add() override;
|
||||
virtual void mul() override;
|
||||
virtual void triad() override;
|
||||
virtual void nstream() override;
|
||||
virtual T dot() override;
|
||||
|
||||
virtual void init_arrays(T initA, T initB, T initC) override;
|
||||
|
||||
53
README.md
53
README.md
@ -57,6 +57,53 @@ Usage
|
||||
|
||||
Drivers, compiler and software applicable to whichever implementation you would like to build against is required.
|
||||
|
||||
### CMake
|
||||
|
||||
The project supports building with CMake >= 3.13.0, it can be installed without root via the [official script](https://cmake.org/download/).
|
||||
As with any CMake project, first configure the project:
|
||||
|
||||
```shell
|
||||
> cd babelstream
|
||||
> cmake -Bbuild -H. -DMODEL=<model> <model specific flags prefixed with -D...> # configure the build, build type defaults to Release
|
||||
> cmake --build build # compile it
|
||||
> ./build/babelstream # executable available at ./build/
|
||||
```
|
||||
|
||||
By default, we have defined a set of optimal flags for known HPC compilers.
|
||||
There are assigned those to `RELEASE_FLAGS`, and you can override them if required.
|
||||
|
||||
To find out what flag each model supports or requires, simply configure while only specifying the model.
|
||||
For example:
|
||||
```shell
|
||||
> cd babelstream
|
||||
> cmake -Bbuild -H. -DMODEL=OCL
|
||||
...
|
||||
- Common Release flags are `-O3`, set RELEASE_FLAGS to override
|
||||
-- CXX_EXTRA_FLAGS:
|
||||
Appends to common compile flags. These will be used at link phase at well.
|
||||
To use separate flags at link time, set `CXX_EXTRA_LINKER_FLAGS`
|
||||
-- CXX_EXTRA_LINK_FLAGS:
|
||||
Appends to link flags which appear *before* the objects.
|
||||
Do not use this for linking libraries, as the link line is order-dependent
|
||||
-- CXX_EXTRA_LIBRARIES:
|
||||
Append to link flags which appears *after* the objects.
|
||||
Use this for linking extra libraries (e.g `-lmylib`, or simply `mylib`)
|
||||
-- CXX_EXTRA_LINKER_FLAGS:
|
||||
Append to linker flags (i.e GCC's `-Wl` or equivalent)
|
||||
-- Available models: OMP;OCL;STD;STD20;HIP;CUDA;KOKKOS;SYCL;ACC;RAJA
|
||||
-- Selected model : OCL
|
||||
-- Supported flags:
|
||||
|
||||
CMAKE_CXX_COMPILER (optional, default=c++): Any CXX compiler that is supported by CMake detection
|
||||
OpenCL_LIBRARY (optional, default=): Path to OpenCL library, usually called libOpenCL.so
|
||||
...
|
||||
```
|
||||
Alternatively, refer to the [CI script](./ci-test-compile.sh), which test-compiles most of the models, and see which flags are used there.
|
||||
|
||||
*It is recommended that you delete the `build` directory when you change any of the build flags.*
|
||||
|
||||
### GNU Make
|
||||
|
||||
We have supplied a series of Makefiles, one for each programming model, to assist with building.
|
||||
The Makefiles contain common build options, and should be simple to customise for your needs too.
|
||||
|
||||
@ -76,8 +123,7 @@ This project also contains implementations in alternative languages with differe
|
||||
> ./target/release/rust-stream
|
||||
```
|
||||
|
||||
Building Kokkos
|
||||
---------------
|
||||
#### Building Kokkos for Make
|
||||
|
||||
Kokkos version >= 3 requires setting the `KOKKOS_PATH` flag to the *source* directory of a distribution.
|
||||
For example:
|
||||
@ -91,8 +137,7 @@ make -f Kokkos.make KOKKOS_PATH=~/kokkos-3.1.01
|
||||
```
|
||||
See make output for more information on supported flags.
|
||||
|
||||
Building RAJA
|
||||
-------------
|
||||
#### Building RAJA for Make
|
||||
|
||||
We use the following command to build RAJA using the Intel Compiler.
|
||||
```
|
||||
|
||||
33
STD.cmake
Normal file
33
STD.cmake
Normal file
@ -0,0 +1,33 @@
|
||||
|
||||
register_flag_optional(CMAKE_CXX_COMPILER
|
||||
"Any CXX compiler that is supported by CMake detection"
|
||||
"c++")
|
||||
|
||||
register_flag_optional(NVHPC_OFFLOAD
|
||||
"Enable offloading support (via the non-standard `-stdpar`) for the new NVHPC SDK.
|
||||
The values are Nvidia architectures in ccXY format will be passed in via `-gpu=` (e.g `cc70`)
|
||||
|
||||
Possible values are:
|
||||
cc35 - Compile for compute capability 3.5
|
||||
cc50 - Compile for compute capability 5.0
|
||||
cc60 - Compile for compute capability 6.0
|
||||
cc62 - Compile for compute capability 6.2
|
||||
cc70 - Compile for compute capability 7.0
|
||||
cc72 - Compile for compute capability 7.2
|
||||
cc75 - Compile for compute capability 7.5
|
||||
cc80 - Compile for compute capability 8.0
|
||||
ccall - Compile for all supported compute capabilities"
|
||||
"")
|
||||
|
||||
macro(setup)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
if (NVHPC_OFFLOAD)
|
||||
set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD})
|
||||
# propagate flags to linker so that it links with the gpu stuff as well
|
||||
register_append_cxx_flags(ANY ${NVHPC_FLAGS})
|
||||
register_append_link_flags(${NVHPC_FLAGS})
|
||||
endif ()
|
||||
|
||||
|
||||
endmacro()
|
||||
16
STD20.cmake
Normal file
16
STD20.cmake
Normal file
@ -0,0 +1,16 @@
|
||||
|
||||
register_flag_optional(CMAKE_CXX_COMPILER
|
||||
"Any CXX compiler that is supported by CMake detection and supports C++20 Ranges"
|
||||
"c++")
|
||||
|
||||
macro(setup)
|
||||
|
||||
# TODO this needs to eventually be removed when CMake adds proper C++20 support or at least update the flag used here
|
||||
|
||||
# C++ 2a is too new, disable CMake's std flags completely:
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED OFF)
|
||||
unset(CMAKE_CXX_STANDARD) # drop any existing standard we have set by default
|
||||
# and append our own:
|
||||
register_append_cxx_flags(ANY -std=c++2a)
|
||||
endmacro()
|
||||
@ -24,7 +24,7 @@ class STDStream : public Stream<T>
|
||||
T *c;
|
||||
|
||||
public:
|
||||
STDStream(const int, int);
|
||||
STDStream(const int, int) noexcept;
|
||||
~STDStream();
|
||||
|
||||
virtual void copy() override;
|
||||
|
||||
87
SYCL.cmake
Normal file
87
SYCL.cmake
Normal file
@ -0,0 +1,87 @@
|
||||
|
||||
register_flag_optional(CMAKE_CXX_COMPILER
|
||||
"Any CXX compiler that is supported by CMake detection, this is used for host compilation when required by the SYCL compiler"
|
||||
"c++")
|
||||
|
||||
register_flag_required(SYCL_COMPILER
|
||||
"Compile using the specified SYCL compiler implementation
|
||||
Supported values are
|
||||
ONEAPI-DPCPP - dpc++ that is part of an oneAPI Base Toolkit distribution (https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit.html)
|
||||
DPCPP - dpc++ as a standalone compiler (https://github.com/intel/llvm)
|
||||
HIPSYCL - hipSYCL compiler (https://github.com/illuhad/hipSYCL)
|
||||
COMPUTECPP - ComputeCpp compiler (https://developer.codeplay.com/products/computecpp/ce/home)")
|
||||
|
||||
register_flag_optional(SYCL_COMPILER_DIR
|
||||
"Absolute path to the selected SYCL compiler directory, most are packaged differently so set the path according to `SYCL_COMPILER`:
|
||||
ONEAPI-DPCPP - not required but `dpcpp` must be on PATH, load oneAPI as per documentation (i.e `source /opt/intel/oneapi/setvars.sh` first)
|
||||
HIPSYCL|DPCPP|COMPUTECPP - set to the root of the binary distribution that contains at least `bin/`, `include/`, and `lib/`"
|
||||
"")
|
||||
|
||||
register_flag_optional(OpenCL_LIBRARY
|
||||
"[ComputeCpp only] Path to OpenCL library, usually called libOpenCL.so"
|
||||
"${OpenCL_LIBRARY}")
|
||||
|
||||
macro(setup)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
|
||||
if (${SYCL_COMPILER} STREQUAL "HIPSYCL")
|
||||
|
||||
|
||||
set(hipSYCL_DIR ${SYCL_COMPILER_DIR}/lib/cmake/hipSYCL)
|
||||
|
||||
if (NOT EXISTS "${hipSYCL_DIR}")
|
||||
message(WARNING "Falling back to hipSYCL < 0.9.0 CMake structure")
|
||||
set(hipSYCL_DIR ${SYCL_COMPILER_DIR}/lib/cmake)
|
||||
endif ()
|
||||
if (NOT EXISTS "${hipSYCL_DIR}")
|
||||
message(FATAL_ERROR "Can't find the appropriate CMake definitions for hipSYCL")
|
||||
endif ()
|
||||
|
||||
# register_definitions(_GLIBCXX_USE_CXX11_ABI=0)
|
||||
find_package(hipSYCL CONFIG REQUIRED)
|
||||
message(STATUS "ok")
|
||||
|
||||
elseif (${SYCL_COMPILER} STREQUAL "COMPUTECPP")
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
|
||||
set(ComputeCpp_DIR ${SYCL_COMPILER_DIR})
|
||||
|
||||
# don't point to the CL dir as the imports already have the CL prefix
|
||||
set(OpenCL_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/CL")
|
||||
|
||||
register_definitions(CL_TARGET_OPENCL_VERSION=220 _GLIBCXX_USE_CXX11_ABI=0)
|
||||
# ComputeCpp needs OpenCL
|
||||
find_package(ComputeCpp REQUIRED)
|
||||
|
||||
# this must come after FindComputeCpp (!)
|
||||
set(COMPUTECPP_USER_FLAGS -O3 -no-serial-memop)
|
||||
|
||||
elseif (${SYCL_COMPILER} STREQUAL "DPCPP")
|
||||
set(CMAKE_CXX_COMPILER ${SYCL_COMPILER_DIR}/bin/clang++)
|
||||
include_directories(${SYCL_COMPILER_DIR}/include/sycl)
|
||||
register_definitions(CL_TARGET_OPENCL_VERSION=220)
|
||||
register_append_cxx_flags(ANY -fsycl)
|
||||
register_append_link_flags(-fsycl)
|
||||
elseif (${SYCL_COMPILER} STREQUAL "ONEAPI-DPCPP")
|
||||
set(CMAKE_CXX_COMPILER dpcpp)
|
||||
register_definitions(CL_TARGET_OPENCL_VERSION=220)
|
||||
else ()
|
||||
message(FATAL_ERROR "SYCL_COMPILER=${SYCL_COMPILER} is unsupported")
|
||||
endif ()
|
||||
|
||||
endmacro()
|
||||
|
||||
|
||||
macro(setup_target NAME)
|
||||
if (
|
||||
(${SYCL_COMPILER} STREQUAL "COMPUTECPP") OR
|
||||
(${SYCL_COMPILER} STREQUAL "HIPSYCL"))
|
||||
# so ComputeCpp and hipSYCL has this weird (and bad) CMake usage where they append their
|
||||
# own custom integration header flags AFTER the target has been specified
|
||||
# hence this macro here
|
||||
add_sycl_to_target(
|
||||
TARGET ${NAME}
|
||||
SOURCES ${IMPL_SOURCES})
|
||||
endif ()
|
||||
endmacro()
|
||||
375
ci-prepare-bionic.sh
Executable file
375
ci-prepare-bionic.sh
Executable file
@ -0,0 +1,375 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -eu
|
||||
|
||||
WORK_DIR="${1:-.}"
|
||||
MODE="${2:-SETUP}"
|
||||
PARALLEL="${3:-false}"
|
||||
|
||||
FORCE_DOWNLOAD=false
|
||||
|
||||
if [ "$_" = "$0" ]; then
|
||||
echo "This script must be sourced for the exports to work!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
case "$MODE" in
|
||||
SETUP)
|
||||
SETUP=true
|
||||
echo "Preparing env with setup..."
|
||||
;;
|
||||
VARS)
|
||||
SETUP=false
|
||||
echo "Preparing env without setup..."
|
||||
;;
|
||||
*)
|
||||
echo "Bad option"
|
||||
echo "$0 <work_dir:dir> <VARS|SETUP> <parallel:bool>"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
mkdir -p "$WORK_DIR"
|
||||
PREV_DIR="$PWD"
|
||||
cd "$WORK_DIR" || exit 1
|
||||
|
||||
export_var() {
|
||||
export "$1"="$2"
|
||||
# see
|
||||
# https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions#setting-an-environment-variable
|
||||
if [ "${GITHUB_ACTIONS:-false}" = true ]; then
|
||||
echo "$1=$2" >>"$GITHUB_ENV"
|
||||
fi
|
||||
}
|
||||
|
||||
check_size() {
|
||||
if [ "$SETUP" = true ]; then
|
||||
echo "Used space for $PWD"
|
||||
du -sh .
|
||||
df -h .
|
||||
fi
|
||||
}
|
||||
|
||||
get_and_install_deb() {
|
||||
|
||||
local name="$1"
|
||||
local install_dir="$2"
|
||||
local pkg_url="$3"
|
||||
shift
|
||||
local wget_args=("$@")
|
||||
|
||||
local pkg_name="$name.deb"
|
||||
|
||||
if [ "$SETUP" = true ]; then
|
||||
if [ ! -f "$pkg_name" ] || [ "$FORCE_DOWNLOAD" = true ]; then
|
||||
echo "$pkg_name not found, downloading"
|
||||
rm -f "$pkg_name"
|
||||
# shellcheck disable=SC2086
|
||||
wget -q --show-progress --progress=bar:force:noscroll "${wget_args[@]}" "$pkg_url" -O "$pkg_name"
|
||||
fi
|
||||
# rm -rf "$install_dir"
|
||||
echo "Preparing to install $pkg_name locally to $install_dir ..."
|
||||
dpkg-deb -x "$pkg_name" "$install_dir"
|
||||
echo "$pkg_name installed, deleting $pkg_name ..."
|
||||
rm -f "$pkg_name" # delete for space
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
get() {
|
||||
local name="$1"
|
||||
local pkg_url="$2"
|
||||
if [ "$SETUP" = true ]; then
|
||||
if [ ! -f "$name" ] || [ "$FORCE_DOWNLOAD" = true ]; then
|
||||
echo "$name not found, downloading..."
|
||||
wget -q --show-progress --progress=bar:force:noscroll "$pkg_url" -O "$name"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
get_and_untar() {
|
||||
local name="$1"
|
||||
local pkg_url="$2"
|
||||
if [ "$SETUP" = true ]; then
|
||||
if [ ! -f "$name" ] || [ "$FORCE_DOWNLOAD" = true ]; then
|
||||
echo "$name not found, downloading..."
|
||||
wget -q --show-progress --progress=bar:force:noscroll "$pkg_url" -O "$name"
|
||||
fi
|
||||
echo "Preparing to extract $name ..."
|
||||
tar -xf "$name"
|
||||
echo "$name extracted, deleting archive ..."
|
||||
rm -f "$name" # delete for space
|
||||
fi
|
||||
}
|
||||
|
||||
verify_bin_exists() {
|
||||
if [ ! -f "$1" ]; then
|
||||
echo "[FAIL] $1 does not exist or is not a file!"
|
||||
exit 1
|
||||
else echo "[OK! ] $1"; fi
|
||||
}
|
||||
|
||||
verify_dir_exists() {
|
||||
if [ ! -d "$1" ]; then
|
||||
echo "[FAIL] $1 does not exist or is not a directory!"
|
||||
exit 1
|
||||
else echo "[OK! ] $1"; fi
|
||||
}
|
||||
|
||||
setup_aocc() {
|
||||
echo "Preparing AOCC"
|
||||
|
||||
local aocc_ver="2.3.0"
|
||||
local tarball="aocc-$aocc_ver.tar.xz"
|
||||
# XXX it's actually XZ compressed, so it should be tar.xz
|
||||
local AOCC_URL="http://developer.amd.com/wordpress/media/files/aocc-compiler-2.3.0.tar"
|
||||
# local AOCC_URL="http://localhost:8000/aocc-compiler-2.3.0.tar"
|
||||
|
||||
get_and_untar "$tarball" "$AOCC_URL"
|
||||
export_var AOCC_CXX "$PWD/aocc-compiler-$aocc_ver/bin/clang++"
|
||||
verify_bin_exists "$AOCC_CXX"
|
||||
"$AOCC_CXX" --version
|
||||
check_size
|
||||
}
|
||||
|
||||
setup_nvhpc() {
|
||||
echo "Preparing Nvidia HPC SDK"
|
||||
|
||||
local tarball="nvhpc.tar.gz"
|
||||
# local url="http://localhost:8000/nvhpc_2021_212_Linux_x86_64_cuda_11.2.tar.gz"
|
||||
local url="https://developer.download.nvidia.com/hpc-sdk/21.2/nvhpc_2021_212_Linux_x86_64_cuda_11.2.tar.gz"
|
||||
|
||||
get_and_untar "$tarball" "$url"
|
||||
|
||||
local sdk_dir="$PWD/nvhpc_2021_212_Linux_x86_64_cuda_11.2/install_components/Linux_x86_64/21.2"
|
||||
local bin_dir="$sdk_dir/compilers/bin"
|
||||
"$bin_dir/makelocalrc" "$bin_dir" -x
|
||||
|
||||
export_var NVHPC_NVCXX "$bin_dir/nvc++"
|
||||
export_var NVHPC_NVCC "$sdk_dir/cuda/11.2/bin/nvcc"
|
||||
export_var NVHPC_CUDA_DIR "$sdk_dir/cuda/11.2"
|
||||
echo "Installed CUDA versions:"
|
||||
ls "$sdk_dir/cuda"
|
||||
verify_bin_exists "$NVHPC_NVCXX"
|
||||
verify_bin_exists "$NVHPC_NVCC"
|
||||
"$NVHPC_NVCXX" --version
|
||||
"$NVHPC_NVCC" --version
|
||||
check_size
|
||||
}
|
||||
|
||||
setup_aomp() {
|
||||
echo "Preparing AOMP"
|
||||
local AOMP_URL="https://github.com/ROCm-Developer-Tools/aomp/releases/download/rel_11.12-0/aomp_Ubuntu1804_11.12-0_amd64.deb"
|
||||
# local AOMP_URL="http://0.0.0.0:8000/aomp_Ubuntu1804_11.12-0_amd64.deb"
|
||||
get_and_install_deb "aomp" "aomp" "$AOMP_URL"
|
||||
|
||||
export_var AOMP_CXX "$PWD/aomp/usr/lib/aomp/bin/clang++"
|
||||
verify_bin_exists "$AOMP_CXX"
|
||||
"$AOMP_CXX" --version
|
||||
check_size
|
||||
}
|
||||
|
||||
setup_oclcpu() {
|
||||
echo "Preparing Intel CPU OpenCL runtime"
|
||||
local tarball="oclcpuexp.tar.gz"
|
||||
local url="https://github.com/intel/llvm/releases/download/2020-12/oclcpuexp-2020.11.11.0.04_rel.tar.gz"
|
||||
# local url="http://localhost:8000/oclcpuexp-2020.11.11.0.04_rel.tar.gz"
|
||||
get_and_untar "$tarball" "$url"
|
||||
export_var OCL_LIB "$PWD/x64/libOpenCL.so"
|
||||
verify_bin_exists "$OCL_LIB"
|
||||
check_size
|
||||
}
|
||||
|
||||
setup_kokkos() {
|
||||
echo "Preparing Kokkos"
|
||||
local kokkos_ver="3.3.01"
|
||||
local tarball="kokkos-$kokkos_ver.tar.gz"
|
||||
|
||||
local url="https://github.com/kokkos/kokkos/archive/$kokkos_ver.tar.gz"
|
||||
# local url="http://localhost:8000/$kokkos_ver.tar.gz"
|
||||
|
||||
get_and_untar "$tarball" "$url"
|
||||
export_var KOKKOS_SRC "$PWD/kokkos-$kokkos_ver"
|
||||
verify_dir_exists "$KOKKOS_SRC"
|
||||
check_size
|
||||
}
|
||||
|
||||
setup_raja() {
|
||||
echo "Preparing RAJA"
|
||||
local raja_ver="0.13.0"
|
||||
local tarball="raja-$raja_ver.tar.gz"
|
||||
|
||||
local url="https://github.com/LLNL/RAJA/releases/download/v0.13.0/RAJA-v$raja_ver.tar.gz"
|
||||
# local url="http://localhost:8000/RAJA-v$raja_ver.tar.gz"
|
||||
|
||||
get_and_untar "$tarball" "$url"
|
||||
export_var RAJA_SRC "$PWD/RAJA-v$raja_ver"
|
||||
verify_dir_exists "$RAJA_SRC"
|
||||
check_size
|
||||
}
|
||||
|
||||
setup_clang_gcc() {
|
||||
|
||||
echo "deb http://archive.ubuntu.com/ubuntu focal main universe" | sudo tee -a /etc/apt/sources.list
|
||||
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -y -qq gcc-10-offload-nvptx gcc-10-offload-amdgcn libtbb2 libtbb-dev g++-10
|
||||
|
||||
export_var GCC_CXX "$(which g++-10)"
|
||||
verify_bin_exists "$GCC_CXX"
|
||||
"$GCC_CXX" --version
|
||||
|
||||
export_var GCC_STD_PAR_LIB "tbb"
|
||||
export_var GCC_OMP_OFFLOAD_AMD true
|
||||
export_var GCC_OMP_OFFLOAD_NVIDIA true
|
||||
|
||||
clang++ --version
|
||||
export_var CLANG_CXX "$(which clang++)"
|
||||
verify_bin_exists "$CLANG_CXX"
|
||||
"$CLANG_CXX" --version
|
||||
|
||||
export_var CLANG_STD_PAR_LIB "tbb"
|
||||
export_var CLANG_OMP_OFFLOAD_AMD false
|
||||
export_var CLANG_OMP_OFFLOAD_NVIDIA false
|
||||
check_size
|
||||
|
||||
}
|
||||
|
||||
setup_rocm() {
|
||||
wget -q -O - "https://repo.radeon.com/rocm/rocm.gpg.key" | sudo apt-key add -
|
||||
echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/debian/ xenial main' | sudo tee /etc/apt/sources.list.d/rocm.list
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -y -qq rocm-dev
|
||||
# AMD needs this rocm_path thing exported...
|
||||
export_var ROCM_PATH "/opt/rocm-4.1.0"
|
||||
export_var HIP_CXX "$ROCM_PATH/bin/hipcc"
|
||||
verify_bin_exists "$HIP_CXX"
|
||||
"$HIP_CXX" --version
|
||||
check_size
|
||||
}
|
||||
|
||||
setup_dpcpp() {
|
||||
|
||||
local nightly="20210106"
|
||||
local tarball="dpcpp-$nightly.tar.gz"
|
||||
|
||||
local url="https://github.com/intel/llvm/releases/download/sycl-nightly/$nightly/dpcpp-compiler.tar.gz"
|
||||
# local url="http://localhost:8000/dpcpp-compiler.tar.gz"
|
||||
|
||||
get_and_untar "$tarball" "$url"
|
||||
|
||||
export_var DPCPP_DIR "$PWD/dpcpp_compiler/"
|
||||
verify_dir_exists "$DPCPP_DIR"
|
||||
"$DPCPP_DIR/bin/clang++" --version
|
||||
check_size
|
||||
}
|
||||
|
||||
setup_hipsycl() {
|
||||
|
||||
sudo apt-get install -y -qq libboost-fiber-dev libboost-context-dev
|
||||
local hipsycl_ver="0.9.0"
|
||||
local tarball="v$hipsycl_ver.tar.gz"
|
||||
local install_dir="$PWD/hipsycl_dist_$hipsycl_ver"
|
||||
|
||||
local url="https://github.com/illuhad/hipSYCL/archive/v$hipsycl_ver.tar.gz"
|
||||
# local url="http://localhost:8000/hipSYCL-$hipsycl_ver.tar.gz"
|
||||
|
||||
get_and_untar "$tarball" "$url"
|
||||
|
||||
if [ "$SETUP" = true ]; then
|
||||
local src="$PWD/hipSYCL-$hipsycl_ver"
|
||||
rm -rf "$src/build"
|
||||
rm -rf "$install_dir"
|
||||
cmake "-B$src/build" "-H$src" \
|
||||
-DCMAKE_C_COMPILER="$(which gcc-10)" \
|
||||
-DCMAKE_CXX_COMPILER="$(which g++-10)" \
|
||||
-DCMAKE_INSTALL_PREFIX="$install_dir" \
|
||||
-DWITH_ROCM_BACKEND=OFF \
|
||||
-DWITH_CUDA_BACKEND=OFF \
|
||||
-DWITH_CPU_BACKEND=ON
|
||||
cmake --build "$src/build" --target install -j "$(nproc)"
|
||||
fi
|
||||
|
||||
export_var HIPSYCL_DIR "$install_dir"
|
||||
verify_dir_exists "$HIPSYCL_DIR"
|
||||
# note: this will forward --version to the default compiler so it won't say anything about hipsycl
|
||||
"$HIPSYCL_DIR/bin/syclcc-clang" --version
|
||||
check_size
|
||||
}
|
||||
|
||||
setup_computecpp() {
|
||||
echo "TODO ComputeCpp requires registration+login to download"
|
||||
}
|
||||
|
||||
if [ "${GITHUB_ACTIONS:-false}" = true ]; then
|
||||
echo "Running in GitHub Actions, defaulting to special export"
|
||||
TERM=xterm
|
||||
export TERM=xterm
|
||||
if [ "$SETUP" = true ]; then
|
||||
echo "Deleting extra packages for space in 5 seconds..."
|
||||
sleep 5
|
||||
echo "Starting apt-get remove:"
|
||||
sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel
|
||||
sudo apt-get autoremove -y
|
||||
check_size
|
||||
fi
|
||||
else
|
||||
echo "Running locally, defaulting to standard export"
|
||||
fi
|
||||
|
||||
setup_cmake() {
|
||||
|
||||
echo "Preparing CMake"
|
||||
|
||||
local cmake_release="https://github.com/Kitware/CMake/releases/download"
|
||||
|
||||
get "cmake-3.13.sh" "$cmake_release/v3.13.4/cmake-3.13.4-Linux-x86_64.sh"
|
||||
chmod +x "./cmake-3.13.sh" && sh "./cmake-3.13.sh" --skip-license --include-subdir
|
||||
export_var CMAKE_3_13_BIN "$PWD/cmake-3.13.4-Linux-x86_64/bin/cmake"
|
||||
verify_bin_exists "$CMAKE_3_13_BIN"
|
||||
"$CMAKE_3_13_BIN" --version
|
||||
|
||||
get "cmake-3.15.sh" "$cmake_release/v3.15.7/cmake-3.15.7-Linux-x86_64.sh"
|
||||
chmod +x "./cmake-3.15.sh" && "./cmake-3.15.sh" --skip-license --include-subdir
|
||||
export_var CMAKE_3_15_BIN "$PWD/cmake-3.15.7-Linux-x86_64/bin/cmake"
|
||||
verify_bin_exists "$CMAKE_3_15_BIN"
|
||||
"$CMAKE_3_15_BIN" --version
|
||||
|
||||
get "cmake-3.18.sh" "$cmake_release/v3.18.6/cmake-3.18.6-Linux-x86_64.sh"
|
||||
chmod +x "./cmake-3.18.sh" && "./cmake-3.18.sh" --skip-license --include-subdir
|
||||
export_var CMAKE_3_18_BIN "$PWD/cmake-3.18.6-Linux-x86_64/bin/cmake"
|
||||
verify_bin_exists "$CMAKE_3_18_BIN"
|
||||
"$CMAKE_3_18_BIN" --version
|
||||
|
||||
check_size
|
||||
|
||||
}
|
||||
|
||||
if [ "$PARALLEL" = true ]; then
|
||||
(setup_clang_gcc && setup_rocm && setup_hipsycl) & # these need apt so run sequentially
|
||||
setup_cmake &
|
||||
setup_oclcpu &
|
||||
setup_aocc &
|
||||
setup_nvhpc &
|
||||
setup_aomp &
|
||||
setup_dpcpp &
|
||||
setup_kokkos &
|
||||
setup_raja &
|
||||
wait
|
||||
else
|
||||
setup_cmake
|
||||
setup_aocc
|
||||
setup_oclcpu
|
||||
setup_nvhpc
|
||||
setup_aomp
|
||||
setup_dpcpp
|
||||
setup_kokkos
|
||||
setup_raja
|
||||
# these need apt
|
||||
setup_clang_gcc
|
||||
setup_rocm
|
||||
setup_hipsycl
|
||||
fi
|
||||
|
||||
echo "Done!"
|
||||
cd "$PREV_DIR" || exit 1
|
||||
282
ci-test-compile.sh
Executable file
282
ci-test-compile.sh
Executable file
@ -0,0 +1,282 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -eu
|
||||
|
||||
# prevent ccache from caching anything for system compilers
|
||||
export CCACHE_DISABLE=1
|
||||
|
||||
BUILD_DIR=${1:-build}
|
||||
COMPILER=${2:-all}
|
||||
MODEL=${3:-all}
|
||||
CMAKE_BIN=${4}
|
||||
|
||||
LOG_DIR="$BUILD_DIR"
|
||||
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
if [ "${GITHUB_ACTIONS:-false}" = true ]; then
|
||||
echo "Running in GitHub Actions, setting TERM..."
|
||||
TERM=xterm
|
||||
export TERM=xterm
|
||||
fi
|
||||
|
||||
function_exists() {
|
||||
declare -f -F "$1" >/dev/null
|
||||
return $?
|
||||
}
|
||||
|
||||
run_build() {
|
||||
local key="$1"
|
||||
local grep_kw="$2"
|
||||
local model="$3"
|
||||
local flags="$4"
|
||||
|
||||
if [ "$MODEL" != "all" ] && [ "$MODEL" != "$model" ]; then
|
||||
echo "Skipping -DMODEL=$model $flags"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local log="$LOG_DIR/${model}_${key}.log"
|
||||
rm -f "$log"
|
||||
touch "$log"
|
||||
|
||||
local build="$BUILD_DIR/${model}_${key}"
|
||||
|
||||
rm -rf "$build"
|
||||
set +e
|
||||
|
||||
# shellcheck disable=SC2086
|
||||
"$CMAKE_BIN" -B"$build" -H. \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON \
|
||||
-DMODEL="$model" $flags &>>"$log"
|
||||
local model_lower=$(echo "$model" | awk '{print tolower($0)}')
|
||||
|
||||
local cmake_code=$?
|
||||
|
||||
"$CMAKE_BIN" --build "$build" -j "$(nproc)" &>>"$log"
|
||||
local cmake_code=$?
|
||||
set -e
|
||||
|
||||
local bin="./$build/$model_lower-stream"
|
||||
echo "Checking for final executable: $bin"
|
||||
if [[ -f "$bin" ]]; then
|
||||
echo "$(tput setaf 2)[PASS!]($model->$build)$(tput sgr0): -DMODEL=$model $flags"
|
||||
# shellcheck disable=SC2002
|
||||
cat "$log" | sed '/^--/d' | grep -i "/bin/nvcc" | sed 's/^/ /'
|
||||
cat "$log" | sed '/^--/d' | grep -i "$grep_kw" | sed 's/^/ /'
|
||||
cat "$log" | sed '/^--/d' | grep -i "warning" | sed "s/.*/ $(tput setaf 3)&$(tput sgr0)/"
|
||||
else
|
||||
echo "$(tput setaf 1)[FAIL!]($model->$build)$(tput sgr0): -DMODEL=$model $flags"
|
||||
echo " $(tput setaf 1)CMake exited with code $cmake_code, see full build log at $log, reproduced below:$(tput sgr0)"
|
||||
cat "$log"
|
||||
exit 1
|
||||
fi
|
||||
echo " $(tput setaf 4)$(file "$bin")$(tput sgr0)"
|
||||
}
|
||||
|
||||
###
|
||||
#KOKKOS_SRC="/home/tom/Downloads/kokkos-3.3.00"
|
||||
#RAJA_SRC="/home/tom/Downloads/RAJA-v0.13.0"
|
||||
#
|
||||
#GCC_CXX="/usr/bin/g++"
|
||||
#CLANG_CXX="/usr/bin/clang++"
|
||||
#
|
||||
#NVSDK="/home/tom/Downloads/nvhpc_2021_212_Linux_x86_64_cuda_11.2/install_components/Linux_x86_64/21.2/"
|
||||
#NVHPC_NVCXX="$NVSDK/compilers/bin/nvc++"
|
||||
#NVHPC_NVCC="$NVSDK/cuda/11.2/bin/nvcc"
|
||||
#NVHPC_CUDA_DIR="$NVSDK/cuda/11.2"
|
||||
#"$NVSDK/compilers/bin/makelocalrc" "$NVSDK/compilers/bin/" -x
|
||||
#
|
||||
#AOCC_CXX="/opt/AMD/aocc-compiler-2.3.0/bin/clang++"
|
||||
#AOMP_CXX="/usr/lib/aomp/bin/clang++"
|
||||
#OCL_LIB="/home/tom/Downloads/oclcpuexp-2020.11.11.0.04_rel/x64/libOpenCL.so"
|
||||
#
|
||||
## AMD needs this rocm_path thing exported...
|
||||
#export ROCM_PATH="/opt/rocm-4.0.0"
|
||||
#HIP_CXX="/opt/rocm-4.0.0/bin/hipcc"
|
||||
#COMPUTECPP_DIR="/home/tom/Desktop/computecpp_archive/ComputeCpp-CE-2.3.0-x86_64-linux-gnu"
|
||||
#DPCPP_DIR="/home/tom/Downloads/dpcpp_compiler"
|
||||
#HIPSYCL_DIR="/opt/hipsycl/cff515c/"
|
||||
#
|
||||
#ICPX_CXX="/opt/intel/oneapi/compiler/2021.1.2/linux/bin/icpx"
|
||||
#ICPC_CXX="/opt/intel/oneapi/compiler/2021.1.2/linux/bin/intel64/icpc"
|
||||
#
|
||||
#GCC_STD_PAR_LIB="tbb"
|
||||
#CLANG_STD_PAR_LIB="tbb"
|
||||
#GCC_OMP_OFFLOAD_AMD=false
|
||||
#GCC_OMP_OFFLOAD_NVIDIA=true
|
||||
#CLANG_OMP_OFFLOAD_AMD=false
|
||||
#CLANG_OMP_OFFLOAD_NVIDIA=false
|
||||
###
|
||||
|
||||
AMD_ARCH="gfx_903"
|
||||
NV_ARCH="sm_70"
|
||||
NV_ARCH_CCXY="cuda11.2,cc80"
|
||||
|
||||
build_gcc() {
|
||||
local name="gcc_build"
|
||||
local cxx="-DCMAKE_CXX_COMPILER=${GCC_CXX:?}"
|
||||
|
||||
run_build $name "${GCC_CXX:?}" OMP "$cxx"
|
||||
if [ "$MODEL" = "all" ] || [ "$MODEL" = "OMP" ]; then
|
||||
# sanity check that it at least runs
|
||||
echo "Sanity checking GCC OMP build..."
|
||||
"./$BUILD_DIR/OMP_$name/omp-stream" -s 1048576 -n 10
|
||||
fi
|
||||
|
||||
# some distributions like Ubuntu bionic implements std par with TBB, so conditionally link it here
|
||||
run_build $name "${GCC_CXX:?}" STD "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}"
|
||||
run_build $name "${GCC_CXX:?}" STD20 "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}"
|
||||
|
||||
if [ "${GCC_OMP_OFFLOAD_AMD:-false}" != "false" ]; then
|
||||
run_build "amd_$name" "${GCC_CXX:?}" ACC "$cxx -DCXX_EXTRA_FLAGS=-foffload=amdgcn-amdhsa"
|
||||
run_build "amd_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=AMD:$AMD_ARCH"
|
||||
fi
|
||||
|
||||
if [ "${GCC_OMP_OFFLOAD_NVIDIA:-false}" != "false" ]; then
|
||||
run_build "nvidia_$name" "${GCC_CXX:?}" ACC "$cxx -DCXX_EXTRA_FLAGS=-foffload=nvptx-none"
|
||||
run_build "nvidia_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH"
|
||||
fi
|
||||
|
||||
run_build $name "${GCC_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH"
|
||||
run_build $name "${GCC_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED"
|
||||
run_build $name "${GCC_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT"
|
||||
# run_build $name "${CC_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_CUDA=ON"
|
||||
run_build "cuda_$name" "${GCC_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
|
||||
run_build $name "${GCC_CXX:?}" OCL "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
|
||||
run_build $name "${GCC_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
|
||||
run_build "cuda_$name" "${GCC_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} \
|
||||
-DENABLE_CUDA=ON \
|
||||
-DTARGET=NVIDIA \
|
||||
-DCUDA_TOOLKIT_ROOT_DIR=${NVHPC_CUDA_DIR:?} \
|
||||
-DCUDA_ARCH=$NV_ARCH"
|
||||
|
||||
}
|
||||
|
||||
build_clang() {
|
||||
local name="clang_build"
|
||||
local cxx="-DCMAKE_CXX_COMPILER=${CLANG_CXX:?}"
|
||||
run_build $name "${CLANG_CXX:?}" OMP "$cxx"
|
||||
|
||||
if [ "${CLANG_OMP_OFFLOAD_AMD:-false}" != "false" ]; then
|
||||
run_build "amd_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=AMD:$AMD_ARCH"
|
||||
fi
|
||||
|
||||
if [ "${CLANG_OMP_OFFLOAD_NVIDIA:-false}" != "false" ]; then
|
||||
run_build "nvidia_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH"
|
||||
fi
|
||||
|
||||
run_build $name "${CLANG_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH"
|
||||
run_build $name "${CLANG_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED"
|
||||
run_build $name "${CLANG_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT"
|
||||
run_build $name "${CLANG_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
|
||||
run_build $name "${CLANG_CXX:?}" OCL "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
|
||||
run_build $name "${CLANG_CXX:?}" STD "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}"
|
||||
# run_build $name "${LANG_CXX:?}" STD20 "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" # not yet supported
|
||||
run_build $name "${CLANG_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
|
||||
# no clang /w RAJA+cuda because it needs nvcc which needs gcc
|
||||
}
|
||||
|
||||
build_nvhpc() {
|
||||
local name="nvhpc_build"
|
||||
local cxx="-DCMAKE_CXX_COMPILER=${NVHPC_NVCXX:?}"
|
||||
run_build $name "${NVHPC_NVCXX:?}" STD "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY"
|
||||
run_build $name "${NVHPC_NVCXX:?}" ACC "$cxx -DTARGET_DEVICE=gpu -DTARGET_PROCESSOR=px -DCUDA_ARCH=$NV_ARCH_CCXY"
|
||||
run_build $name "${NVHPC_NVCXX:?}" ACC "$cxx -DTARGET_DEVICE=multicore -DTARGET_PROCESSOR=zen"
|
||||
}
|
||||
|
||||
build_aocc() {
|
||||
run_build aocc_build "${AOCC_CXX:?}" OMP "-DCMAKE_CXX_COMPILER=${AOCC_CXX:?}"
|
||||
}
|
||||
|
||||
build_aomp() {
|
||||
run_build aomp_amd_build "${AOMP_CXX:?}" OMP "-DCMAKE_CXX_COMPILER=${AOMP_CXX:?} -DOFFLOAD=AMD:gfx906"
|
||||
#run_build aomp_nvidia_build "-DCMAKE_CXX_COMPILER=${AOMP_CXX:?} -DOFFLOAD=NVIDIA:$NV_ARCH"
|
||||
}
|
||||
|
||||
build_hip() {
|
||||
run_build hip_build "${HIP_CXX:?}" HIP "-DCMAKE_CXX_COMPILER=${HIP_CXX:?}"
|
||||
}
|
||||
|
||||
build_icpx() {
|
||||
# clang derived
|
||||
set +u
|
||||
source /opt/intel/oneapi/setvars.sh -force || true
|
||||
set -u
|
||||
run_build intel_build "${ICPX_CXX:?}" OMP "-DCMAKE_CXX_COMPILER=${ICPX_CXX:?} -DOFFLOAD=INTEL"
|
||||
}
|
||||
|
||||
build_icpc() {
|
||||
# icc/icpc
|
||||
set +u
|
||||
source /opt/intel/oneapi/setvars.sh -force || true
|
||||
set -u
|
||||
local name="intel_build"
|
||||
local cxx="-DCMAKE_CXX_COMPILER=${ICPC_CXX:?}"
|
||||
run_build $name "${ICPC_CXX:?}" OMP "$cxx"
|
||||
run_build $name "${ICPC_CXX:?}" OCL "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
|
||||
run_build $name "${ICPC_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
|
||||
run_build $name "${ICPC_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
|
||||
}
|
||||
|
||||
build_computecpp() {
|
||||
run_build computecpp_build "compute++" SYCL "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \
|
||||
-DSYCL_COMPILER=COMPUTECPP \
|
||||
-DSYCL_COMPILER_DIR=${COMPUTECPP_DIR:?} \
|
||||
-DOpenCL_LIBRARY=${OCL_LIB:?}"
|
||||
}
|
||||
|
||||
build_dpcpp() {
|
||||
run_build intel_build "${DPCPP_DIR:?}" SYCL "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \
|
||||
-DSYCL_COMPILER=DPCPP \
|
||||
-DSYCL_COMPILER_DIR=${DPCPP_DIR:?}"
|
||||
|
||||
# for oneAPI BaseKit:
|
||||
# source /opt/intel/oneapi/setvars.sh -force
|
||||
# run_build intel_build "dpcpp" SYCL "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} -DSYCL_COMPILER=ONEAPI-DPCPP"
|
||||
}
|
||||
|
||||
build_hipsycl() {
|
||||
run_build hipsycl_build "syclcc" SYCL "
|
||||
-DSYCL_COMPILER=HIPSYCL \
|
||||
-DSYCL_COMPILER_DIR=${HIPSYCL_DIR:?}"
|
||||
}
|
||||
|
||||
echo "Test compiling with ${COMPILER} CXX for ${MODEL} model"
|
||||
"$CMAKE_BIN" --version
|
||||
|
||||
case "$COMPILER" in
|
||||
gcc) build_gcc ;;
|
||||
clang) build_clang ;;
|
||||
nvhpc) build_nvhpc ;;
|
||||
aocc) build_aocc ;;
|
||||
aomp) build_aomp ;;
|
||||
hip) build_hip ;;
|
||||
dpcpp) build_dpcpp ;;
|
||||
hipsycl) build_hipsycl ;;
|
||||
|
||||
# XXX below are local only; licence or very large download required, candidate for local runner
|
||||
computecpp) build_computecpp ;;
|
||||
icpx) build_icpx ;;
|
||||
icpc) build_icpc ;;
|
||||
|
||||
all)
|
||||
build_gcc
|
||||
build_clang
|
||||
build_nvhpc
|
||||
build_aocc
|
||||
build_aomp
|
||||
build_hip
|
||||
build_dpcpp
|
||||
build_hipsycl
|
||||
|
||||
build_computecpp
|
||||
build_icpx
|
||||
build_icpc
|
||||
|
||||
;;
|
||||
*)
|
||||
echo "Unknown $COMPILER, use ALL to compile with all supported compilers"
|
||||
;;
|
||||
esac
|
||||
65
cmake/Modules/ComputeCppCompilerChecks.cmake
Normal file
65
cmake/Modules/ComputeCppCompilerChecks.cmake
Normal file
@ -0,0 +1,65 @@
|
||||
cmake_minimum_required(VERSION 3.4.3)
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8)
|
||||
message(FATAL_ERROR "host compiler - gcc version must be > 4.8")
|
||||
endif()
|
||||
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.6)
|
||||
message(FATAL_ERROR "host compiler - clang version must be > 3.6")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
set(ComputeCpp_STL_CHECK_SRC __STL_check)
|
||||
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp
|
||||
"#include <CL/sycl.hpp> \n"
|
||||
"int main() { return 0; }\n")
|
||||
set(_stl_test_command ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE}
|
||||
-sycl
|
||||
${COMPUTECPP_DEVICE_COMPILER_FLAGS}
|
||||
-isystem ${ComputeCpp_INCLUDE_DIRS}
|
||||
-isystem ${OpenCL_INCLUDE_DIRS}
|
||||
-o ${ComputeCpp_STL_CHECK_SRC}.sycl
|
||||
-c ${ComputeCpp_STL_CHECK_SRC}.cpp)
|
||||
execute_process(
|
||||
COMMAND ${_stl_test_command}
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||
RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT
|
||||
ERROR_QUIET
|
||||
OUTPUT_QUIET)
|
||||
if(NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0)
|
||||
# Try disabling compiler version checks
|
||||
execute_process(
|
||||
COMMAND ${_stl_test_command}
|
||||
-D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||
RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT
|
||||
ERROR_QUIET
|
||||
OUTPUT_QUIET)
|
||||
if(NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0)
|
||||
# Try again with __CUDACC__ and _HAS_CONDITIONAL_EXPLICIT=0. This relaxes the restritions in the MSVC headers
|
||||
execute_process(
|
||||
COMMAND ${_stl_test_command}
|
||||
-D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH
|
||||
-D_HAS_CONDITIONAL_EXPLICIT=0
|
||||
-D__CUDACC__
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||
RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT
|
||||
ERROR_QUIET
|
||||
OUTPUT_QUIET)
|
||||
if(NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0)
|
||||
message(FATAL_ERROR "compute++ cannot consume hosted STL headers. This means that compute++ can't \
|
||||
compile a simple program in this platform and will fail when used in this system.")
|
||||
else()
|
||||
list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH
|
||||
-D_HAS_CONDITIONAL_EXPLICIT=0
|
||||
-D__CUDACC__)
|
||||
endif()
|
||||
else()
|
||||
list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH)
|
||||
endif()
|
||||
endif()
|
||||
file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp
|
||||
${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp.sycl)
|
||||
endif(MSVC)
|
||||
18
cmake/Modules/ComputeCppIRMap.cmake
Normal file
18
cmake/Modules/ComputeCppIRMap.cmake
Normal file
@ -0,0 +1,18 @@
|
||||
cmake_minimum_required(VERSION 3.4.3)
|
||||
|
||||
# These should match the types of IR output by compute++
|
||||
set(IR_MAP_spir bc)
|
||||
set(IR_MAP_spir64 bc)
|
||||
set(IR_MAP_spir32 bc)
|
||||
set(IR_MAP_spirv spv)
|
||||
set(IR_MAP_spirv64 spv)
|
||||
set(IR_MAP_spirv32 spv)
|
||||
set(IR_MAP_aorta-x86_64 o)
|
||||
set(IR_MAP_aorta-aarch64 o)
|
||||
set(IR_MAP_aorta-rcar-cve o)
|
||||
set(IR_MAP_custom-spir64 bc)
|
||||
set(IR_MAP_custom-spir32 bc)
|
||||
set(IR_MAP_custom-spirv64 spv)
|
||||
set(IR_MAP_custom-spirv32 spv)
|
||||
set(IR_MAP_ptx64 s)
|
||||
set(IR_MAP_amdgcn s)
|
||||
454
cmake/Modules/FindComputeCpp.cmake
Normal file
454
cmake/Modules/FindComputeCpp.cmake
Normal file
@ -0,0 +1,454 @@
|
||||
#.rst:
|
||||
# FindComputeCpp
|
||||
#---------------
|
||||
#
|
||||
# Copyright 2016-2018 Codeplay Software Ltd.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use these files except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#########################
|
||||
# FindComputeCpp.cmake
|
||||
#########################
|
||||
#
|
||||
# Tools for finding and building with ComputeCpp.
|
||||
#
|
||||
# User must define ComputeCpp_DIR pointing to the ComputeCpp
|
||||
# installation.
|
||||
#
|
||||
# Latest version of this file can be found at:
|
||||
# https://github.com/codeplaysoftware/computecpp-sdk
|
||||
|
||||
cmake_minimum_required(VERSION 3.4.3)
|
||||
include(FindPackageHandleStandardArgs)
|
||||
include(ComputeCppIRMap)
|
||||
|
||||
set(COMPUTECPP_USER_FLAGS "" CACHE STRING "User flags for compute++")
|
||||
separate_arguments(COMPUTECPP_USER_FLAGS)
|
||||
mark_as_advanced(COMPUTECPP_USER_FLAGS)
|
||||
|
||||
set(COMPUTECPP_BITCODE "spir64" CACHE STRING
|
||||
"Bitcode type to use as SYCL target in compute++")
|
||||
mark_as_advanced(COMPUTECPP_BITCODE)
|
||||
|
||||
find_package(OpenCL REQUIRED)
|
||||
|
||||
# Find ComputeCpp package
|
||||
|
||||
if(DEFINED ComputeCpp_DIR)
|
||||
set(computecpp_find_hint ${ComputeCpp_DIR})
|
||||
elseif(DEFINED ENV{COMPUTECPP_DIR})
|
||||
set(computecpp_find_hint $ENV{COMPUTECPP_DIR})
|
||||
endif()
|
||||
|
||||
# Used for running executables on the host
|
||||
set(computecpp_host_find_hint ${computecpp_find_hint})
|
||||
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
# ComputeCpp_HOST_DIR is used to find executables that are run on the host
|
||||
if(DEFINED ComputeCpp_HOST_DIR)
|
||||
set(computecpp_host_find_hint ${ComputeCpp_HOST_DIR})
|
||||
elseif(DEFINED ENV{COMPUTECPP_HOST_DIR})
|
||||
set(computecpp_host_find_hint $ENV{COMPUTECPP_HOST_DIR})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
find_program(ComputeCpp_DEVICE_COMPILER_EXECUTABLE compute++
|
||||
HINTS ${computecpp_host_find_hint}
|
||||
PATH_SUFFIXES bin
|
||||
NO_SYSTEM_ENVIRONMENT_PATH)
|
||||
|
||||
find_program(ComputeCpp_INFO_EXECUTABLE computecpp_info
|
||||
HINTS ${computecpp_host_find_hint}
|
||||
PATH_SUFFIXES bin
|
||||
NO_SYSTEM_ENVIRONMENT_PATH)
|
||||
|
||||
find_library(COMPUTECPP_RUNTIME_LIBRARY
|
||||
NAMES ComputeCpp ComputeCpp_vs2015
|
||||
HINTS ${computecpp_find_hint}
|
||||
PATH_SUFFIXES lib
|
||||
DOC "ComputeCpp Runtime Library")
|
||||
|
||||
find_library(COMPUTECPP_RUNTIME_LIBRARY_DEBUG
|
||||
NAMES ComputeCpp_d ComputeCpp ComputeCpp_vs2015_d
|
||||
HINTS ${computecpp_find_hint}
|
||||
PATH_SUFFIXES lib
|
||||
DOC "ComputeCpp Debug Runtime Library")
|
||||
|
||||
find_path(ComputeCpp_INCLUDE_DIRS
|
||||
NAMES "CL/sycl.hpp"
|
||||
HINTS ${computecpp_find_hint}/include
|
||||
DOC "The ComputeCpp include directory")
|
||||
get_filename_component(ComputeCpp_INCLUDE_DIRS ${ComputeCpp_INCLUDE_DIRS} ABSOLUTE)
|
||||
|
||||
get_filename_component(computecpp_canonical_root_dir "${ComputeCpp_INCLUDE_DIRS}/.." ABSOLUTE)
|
||||
set(ComputeCpp_ROOT_DIR "${computecpp_canonical_root_dir}" CACHE PATH
|
||||
"The root of the ComputeCpp install")
|
||||
|
||||
if(NOT ComputeCpp_INFO_EXECUTABLE)
|
||||
message(WARNING "Can't find computecpp_info - check ComputeCpp_DIR")
|
||||
else()
|
||||
execute_process(COMMAND ${ComputeCpp_INFO_EXECUTABLE} "--dump-version"
|
||||
OUTPUT_VARIABLE ComputeCpp_VERSION
|
||||
RESULT_VARIABLE ComputeCpp_INFO_EXECUTABLE_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
if(NOT ComputeCpp_INFO_EXECUTABLE_RESULT EQUAL "0")
|
||||
message(WARNING "Package version - Error obtaining version!")
|
||||
endif()
|
||||
|
||||
execute_process(COMMAND ${ComputeCpp_INFO_EXECUTABLE} "--dump-is-supported"
|
||||
OUTPUT_VARIABLE COMPUTECPP_PLATFORM_IS_SUPPORTED
|
||||
RESULT_VARIABLE ComputeCpp_INFO_EXECUTABLE_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
if(NOT ComputeCpp_INFO_EXECUTABLE_RESULT EQUAL "0")
|
||||
message(WARNING "platform - Error checking platform support!")
|
||||
else()
|
||||
mark_as_advanced(COMPUTECPP_PLATFORM_IS_SUPPORTED)
|
||||
if (COMPUTECPP_PLATFORM_IS_SUPPORTED)
|
||||
message(STATUS "platform - your system can support ComputeCpp")
|
||||
else()
|
||||
message(STATUS "platform - your system is not officially supported")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
find_package_handle_standard_args(ComputeCpp
|
||||
REQUIRED_VARS ComputeCpp_ROOT_DIR
|
||||
ComputeCpp_DEVICE_COMPILER_EXECUTABLE
|
||||
ComputeCpp_INFO_EXECUTABLE
|
||||
COMPUTECPP_RUNTIME_LIBRARY
|
||||
COMPUTECPP_RUNTIME_LIBRARY_DEBUG
|
||||
ComputeCpp_INCLUDE_DIRS
|
||||
VERSION_VAR ComputeCpp_VERSION)
|
||||
mark_as_advanced(ComputeCpp_ROOT_DIR
|
||||
ComputeCpp_DEVICE_COMPILER_EXECUTABLE
|
||||
ComputeCpp_INFO_EXECUTABLE
|
||||
COMPUTECPP_RUNTIME_LIBRARY
|
||||
COMPUTECPP_RUNTIME_LIBRARY_DEBUG
|
||||
ComputeCpp_INCLUDE_DIRS
|
||||
ComputeCpp_VERSION)
|
||||
|
||||
if(NOT ComputeCpp_FOUND)
|
||||
return()
|
||||
endif()
|
||||
|
||||
list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -O2 -mllvm -inline-threshold=1000 -intelspirmetadata)
|
||||
mark_as_advanced(COMPUTECPP_DEVICE_COMPILER_FLAGS)
|
||||
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
if(NOT COMPUTECPP_DONT_USE_TOOLCHAIN)
|
||||
list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS --gcc-toolchain=${COMPUTECPP_TOOLCHAIN_DIR})
|
||||
endif()
|
||||
list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS --sysroot=${COMPUTECPP_SYSROOT_DIR})
|
||||
list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -target ${COMPUTECPP_TARGET_TRIPLE})
|
||||
endif()
|
||||
|
||||
list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -sycl-target ${COMPUTECPP_BITCODE})
|
||||
message(STATUS "compute++ flags - ${COMPUTECPP_DEVICE_COMPILER_FLAGS}")
|
||||
|
||||
include(ComputeCppCompilerChecks)
|
||||
|
||||
if(NOT TARGET OpenCL::OpenCL)
|
||||
add_library(OpenCL::OpenCL UNKNOWN IMPORTED)
|
||||
set_target_properties(OpenCL::OpenCL PROPERTIES
|
||||
IMPORTED_LOCATION "${OpenCL_LIBRARIES}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${OpenCL_INCLUDE_DIRS}"
|
||||
)
|
||||
endif()
|
||||
|
||||
if(NOT TARGET ComputeCpp::ComputeCpp)
|
||||
add_library(ComputeCpp::ComputeCpp UNKNOWN IMPORTED)
|
||||
set_target_properties(ComputeCpp::ComputeCpp PROPERTIES
|
||||
IMPORTED_LOCATION_DEBUG "${COMPUTECPP_RUNTIME_LIBRARY_DEBUG}"
|
||||
IMPORTED_LOCATION_RELWITHDEBINFO "${COMPUTECPP_RUNTIME_LIBRARY}"
|
||||
IMPORTED_LOCATION "${COMPUTECPP_RUNTIME_LIBRARY}"
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${ComputeCpp_INCLUDE_DIRS}"
|
||||
INTERFACE_LINK_LIBRARIES "OpenCL::OpenCL"
|
||||
)
|
||||
endif()
|
||||
|
||||
# This property allows targets to specify that their sources should be
|
||||
# compiled with the integration header included after the user's
|
||||
# sources, not before (e.g. when an enum is used in a kernel name, this
|
||||
# is not technically valid SYCL code but can work with ComputeCpp)
|
||||
define_property(
|
||||
TARGET PROPERTY COMPUTECPP_INCLUDE_AFTER
|
||||
BRIEF_DOCS "Include integration header after user source"
|
||||
FULL_DOCS "Changes compiler arguments such that the source file is
|
||||
actually the integration header, and the .cpp file is included on
|
||||
the command line so that it is seen by the compiler first. Enables
|
||||
non-standards-conformant SYCL code to compile with ComputeCpp."
|
||||
)
|
||||
define_property(
|
||||
TARGET PROPERTY INTERFACE_COMPUTECPP_FLAGS
|
||||
BRIEF_DOCS "Interface compile flags to provide compute++"
|
||||
FULL_DOCS "Set additional compile flags to pass to compute++ when compiling
|
||||
any target which links to this one."
|
||||
)
|
||||
define_property(
|
||||
SOURCE PROPERTY COMPUTECPP_SOURCE_FLAGS
|
||||
BRIEF_DOCS "Source file compile flags for compute++"
|
||||
FULL_DOCS "Set additional compile flags for compiling the SYCL integration
|
||||
header for the given source file."
|
||||
)
|
||||
|
||||
####################
|
||||
# __build_ir
|
||||
####################
|
||||
#
|
||||
# Adds a custom target for running compute++ and adding a dependency for the
|
||||
# resulting integration header and kernel binary.
|
||||
#
|
||||
# TARGET : Name of the target.
|
||||
# SOURCE : Source file to be compiled.
|
||||
# COUNTER : Counter included in name of custom target. Different counter
|
||||
# values prevent duplicated names of custom target when source files with
|
||||
# the same name, but located in different directories, are used for the
|
||||
# same target.
|
||||
#
|
||||
function(__build_ir)
|
||||
set(options)
|
||||
set(one_value_args
|
||||
TARGET
|
||||
SOURCE
|
||||
COUNTER
|
||||
)
|
||||
set(multi_value_args)
|
||||
cmake_parse_arguments(SDK_BUILD_IR
|
||||
"${options}"
|
||||
"${one_value_args}"
|
||||
"${multi_value_args}"
|
||||
${ARGN}
|
||||
)
|
||||
get_filename_component(sourceFileName ${SDK_BUILD_IR_SOURCE} NAME)
|
||||
|
||||
# Set the path to the integration header.
|
||||
# The .sycl filename must depend on the target so that different targets
|
||||
# using the same source file will be generated with a different rule.
|
||||
set(baseSyclName ${CMAKE_CURRENT_BINARY_DIR}/${SDK_BUILD_IR_TARGET}_${sourceFileName})
|
||||
set(outputSyclFile ${baseSyclName}.sycl)
|
||||
set(outputDeviceFile ${baseSyclName}.${IR_MAP_${COMPUTECPP_BITCODE}})
|
||||
set(depFileName ${baseSyclName}.sycl.d)
|
||||
|
||||
set(include_directories "$<TARGET_PROPERTY:${SDK_BUILD_IR_TARGET},INCLUDE_DIRECTORIES>")
|
||||
set(compile_definitions "$<TARGET_PROPERTY:${SDK_BUILD_IR_TARGET},COMPILE_DEFINITIONS>")
|
||||
set(generated_include_directories
|
||||
$<$<BOOL:${include_directories}>:-I\"$<JOIN:${include_directories},\"\t-I\">\">)
|
||||
set(generated_compile_definitions
|
||||
$<$<BOOL:${compile_definitions}>:-D$<JOIN:${compile_definitions},\t-D>>)
|
||||
|
||||
# Obtain language standard of the file
|
||||
set(device_compiler_cxx_standard)
|
||||
get_target_property(targetCxxStandard ${SDK_BUILD_IR_TARGET} CXX_STANDARD)
|
||||
if (targetCxxStandard MATCHES 17)
|
||||
set(device_compiler_cxx_standard "-std=c++1z")
|
||||
elseif (targetCxxStandard MATCHES 14)
|
||||
set(device_compiler_cxx_standard "-std=c++14")
|
||||
elseif (targetCxxStandard MATCHES 11)
|
||||
set(device_compiler_cxx_standard "-std=c++11")
|
||||
elseif (targetCxxStandard MATCHES 98)
|
||||
message(FATAL_ERROR "SYCL applications cannot be compiled using C++98")
|
||||
else ()
|
||||
set(device_compiler_cxx_standard "")
|
||||
endif()
|
||||
|
||||
get_property(source_compile_flags
|
||||
SOURCE ${SDK_BUILD_IR_SOURCE}
|
||||
PROPERTY COMPUTECPP_SOURCE_FLAGS
|
||||
)
|
||||
separate_arguments(source_compile_flags)
|
||||
if(source_compile_flags)
|
||||
list(APPEND computecpp_source_flags ${source_compile_flags})
|
||||
endif()
|
||||
|
||||
list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS
|
||||
${device_compiler_cxx_standard}
|
||||
${COMPUTECPP_USER_FLAGS}
|
||||
${computecpp_source_flags}
|
||||
)
|
||||
|
||||
set(ir_dependencies ${SDK_BUILD_IR_SOURCE})
|
||||
get_target_property(target_libraries ${SDK_BUILD_IR_TARGET} LINK_LIBRARIES)
|
||||
if(target_libraries)
|
||||
foreach(library ${target_libraries})
|
||||
if(TARGET ${library})
|
||||
list(APPEND ir_dependencies ${library})
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
# Depfile support was only added in CMake 3.7
|
||||
# CMake throws an error if it is unsupported by the generator (i. e. not ninja)
|
||||
if((NOT CMAKE_VERSION VERSION_LESS 3.7.0) AND
|
||||
CMAKE_GENERATOR MATCHES "Ninja")
|
||||
file(RELATIVE_PATH relOutputFile ${CMAKE_BINARY_DIR} ${outputDeviceFile})
|
||||
set(generate_depfile -MMD -MF ${depFileName} -MT ${relOutputFile})
|
||||
set(enable_depfile DEPFILE ${depFileName})
|
||||
endif()
|
||||
|
||||
# Add custom command for running compute++
|
||||
add_custom_command(
|
||||
OUTPUT ${outputDeviceFile} ${outputSyclFile}
|
||||
COMMAND ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE}
|
||||
${COMPUTECPP_DEVICE_COMPILER_FLAGS}
|
||||
${generated_include_directories}
|
||||
${generated_compile_definitions}
|
||||
-sycl-ih ${outputSyclFile}
|
||||
-o ${outputDeviceFile}
|
||||
-c ${SDK_BUILD_IR_SOURCE}
|
||||
${generate_depfile}
|
||||
DEPENDS ${ir_dependencies}
|
||||
IMPLICIT_DEPENDS CXX ${SDK_BUILD_IR_SOURCE}
|
||||
${enable_depfile}
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||
COMMENT "Building ComputeCpp integration header file ${outputSyclFile}")
|
||||
|
||||
# Name: (user-defined name)_(source file)_(counter)_ih
|
||||
set(headerTargetName
|
||||
${SDK_BUILD_IR_TARGET}_${sourceFileName}_${SDK_BUILD_IR_COUNTER}_ih)
|
||||
|
||||
if(NOT MSVC)
|
||||
# Add a custom target for the generated integration header
|
||||
add_custom_target(${headerTargetName} DEPENDS ${outputDeviceFile} ${outputSyclFile})
|
||||
add_dependencies(${SDK_BUILD_IR_TARGET} ${headerTargetName})
|
||||
endif()
|
||||
|
||||
# This property can be set on a per-target basis to indicate that the
|
||||
# integration header should appear after the main source listing
|
||||
get_target_property(includeAfter ${SDK_ADD_SYCL_TARGET} COMPUTECPP_INCLUDE_AFTER)
|
||||
|
||||
if(includeAfter)
|
||||
# Change the source file to the integration header - e.g.
|
||||
# g++ -c source_file_name.cpp.sycl
|
||||
get_target_property(current_sources ${SDK_BUILD_IR_TARGET} SOURCES)
|
||||
# Remove absolute path to source file
|
||||
list(REMOVE_ITEM current_sources ${SDK_BUILD_IR_SOURCE})
|
||||
# Remove relative path to source file
|
||||
string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" ""
|
||||
rel_source_file ${SDK_BUILD_IR_SOURCE}
|
||||
)
|
||||
list(REMOVE_ITEM current_sources ${rel_source_file})
|
||||
# Add SYCL header to source list
|
||||
list(APPEND current_sources ${outputSyclFile})
|
||||
set_property(TARGET ${SDK_BUILD_IR_TARGET}
|
||||
PROPERTY SOURCES ${current_sources})
|
||||
# CMake/gcc don't know what language a .sycl file is, so tell them
|
||||
set_property(SOURCE ${outputSyclFile} PROPERTY LANGUAGE CXX)
|
||||
set(includedFile ${SDK_BUILD_IR_SOURCE})
|
||||
set(cppFile ${outputSyclFile})
|
||||
else()
|
||||
set_property(SOURCE ${outputSyclFile} PROPERTY HEADER_FILE_ONLY ON)
|
||||
set(includedFile ${outputSyclFile})
|
||||
set(cppFile ${SDK_BUILD_IR_SOURCE})
|
||||
endif()
|
||||
|
||||
# Force inclusion of the integration header for the host compiler
|
||||
if(MSVC)
|
||||
# Group SYCL files inside Visual Studio
|
||||
source_group("SYCL" FILES ${outputSyclFile})
|
||||
|
||||
if(includeAfter)
|
||||
# Allow the source file to be edited using Visual Studio.
|
||||
# It will be added as a header file so it won't be compiled.
|
||||
set_property(SOURCE ${SDK_BUILD_IR_SOURCE} PROPERTY HEADER_FILE_ONLY true)
|
||||
endif()
|
||||
|
||||
# Add both source and the sycl files to the VS solution.
|
||||
target_sources(${SDK_BUILD_IR_TARGET} PUBLIC ${SDK_BUILD_IR_SOURCE} ${outputSyclFile})
|
||||
|
||||
set(forceIncludeFlags "/FI${includedFile} /TP")
|
||||
else()
|
||||
set(forceIncludeFlags "-include ${includedFile} -x c++")
|
||||
endif()
|
||||
|
||||
set_property(
|
||||
SOURCE ${cppFile}
|
||||
APPEND_STRING PROPERTY COMPILE_FLAGS "${forceIncludeFlags}"
|
||||
)
|
||||
|
||||
endfunction(__build_ir)
|
||||
|
||||
#######################
|
||||
# add_sycl_to_target
|
||||
#######################
|
||||
#
|
||||
# Adds a SYCL compilation custom command associated with an existing
|
||||
# target and sets a dependancy on that new command.
|
||||
#
|
||||
# TARGET : Name of the target to add SYCL to.
|
||||
# SOURCES : Source files to be compiled for SYCL.
|
||||
#
|
||||
function(add_sycl_to_target)
|
||||
set(options)
|
||||
set(one_value_args
|
||||
TARGET
|
||||
)
|
||||
set(multi_value_args
|
||||
SOURCES
|
||||
)
|
||||
cmake_parse_arguments(SDK_ADD_SYCL
|
||||
"${options}"
|
||||
"${one_value_args}"
|
||||
"${multi_value_args}"
|
||||
${ARGN}
|
||||
)
|
||||
|
||||
set_target_properties(${SDK_ADD_SYCL_TARGET} PROPERTIES LINKER_LANGUAGE CXX)
|
||||
|
||||
# If the CXX compiler is set to compute++ enable the driver.
|
||||
get_filename_component(cmakeCxxCompilerFileName "${CMAKE_CXX_COMPILER}" NAME)
|
||||
if("${cmakeCxxCompilerFileName}" STREQUAL "compute++")
|
||||
if(MSVC)
|
||||
message(FATAL_ERROR "The compiler driver is not supported by this system,
|
||||
revert the CXX compiler to your default host compiler.")
|
||||
endif()
|
||||
|
||||
get_target_property(includeAfter ${SDK_ADD_SYCL_TARGET} COMPUTECPP_INCLUDE_AFTER)
|
||||
if(includeAfter)
|
||||
list(APPEND COMPUTECPP_USER_FLAGS -fsycl-ih-last)
|
||||
endif()
|
||||
list(INSERT COMPUTECPP_DEVICE_COMPILER_FLAGS 0 -sycl-driver)
|
||||
# Prepend COMPUTECPP_DEVICE_COMPILER_FLAGS and append COMPUTECPP_USER_FLAGS
|
||||
foreach(prop COMPILE_OPTIONS INTERFACE_COMPILE_OPTIONS)
|
||||
get_target_property(target_compile_options ${SDK_ADD_SYCL_TARGET} ${prop})
|
||||
if(NOT target_compile_options)
|
||||
set(target_compile_options "")
|
||||
endif()
|
||||
set_property(
|
||||
TARGET ${SDK_ADD_SYCL_TARGET}
|
||||
PROPERTY ${prop}
|
||||
${COMPUTECPP_DEVICE_COMPILER_FLAGS}
|
||||
${target_compile_options}
|
||||
${COMPUTECPP_USER_FLAGS}
|
||||
)
|
||||
endforeach()
|
||||
else()
|
||||
set(fileCounter 0)
|
||||
list(INSERT COMPUTECPP_DEVICE_COMPILER_FLAGS 0 -sycl)
|
||||
# Add custom target to run compute++ and generate the integration header
|
||||
foreach(sourceFile ${SDK_ADD_SYCL_SOURCES})
|
||||
if(NOT IS_ABSOLUTE ${sourceFile})
|
||||
set(sourceFile "${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile}")
|
||||
endif()
|
||||
__build_ir(
|
||||
TARGET ${SDK_ADD_SYCL_TARGET}
|
||||
SOURCE ${sourceFile}
|
||||
COUNTER ${fileCounter}
|
||||
)
|
||||
MATH(EXPR fileCounter "${fileCounter} + 1")
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
set_property(TARGET ${SDK_ADD_SYCL_TARGET}
|
||||
APPEND PROPERTY LINK_LIBRARIES ComputeCpp::ComputeCpp)
|
||||
set_property(TARGET ${SDK_ADD_SYCL_TARGET}
|
||||
APPEND PROPERTY INTERFACE_LINK_LIBRARIES ComputeCpp::ComputeCpp)
|
||||
endfunction(add_sycl_to_target)
|
||||
34
cmake/toolchains/arm-gcc-poky.cmake
Normal file
34
cmake/toolchains/arm-gcc-poky.cmake
Normal file
@ -0,0 +1,34 @@
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR ARM64)
|
||||
set(SDK_POKY_ROOT $ENV{SDK_POKY_ROOT})
|
||||
|
||||
if(NOT SDK_POKY_ROOT)
|
||||
message(FATAL_ERROR
|
||||
"Please set SDK_POKY_ROOT in the environment when crosscompiling.")
|
||||
endif()
|
||||
|
||||
set(COMPUTECPP_TARGET_TRIPLE aarch64-poky-linux)
|
||||
set(COMPUTECPP_TOOLCHAIN_DIR ${SDK_POKY_ROOT}/x86_64-pokysdk-linux)
|
||||
set(COMPUTECPP_SYSROOT_DIR ${SDK_POKY_ROOT}/aarch64-poky-linux)
|
||||
# Adding this as the GCC toolchain makes compute++ not find headers
|
||||
set(COMPUTECPP_DONT_USE_TOOLCHAIN ON)
|
||||
|
||||
set(CMAKE_C_COMPILER "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-gcc" CACHE PATH "gcc")
|
||||
set(CMAKE_CXX_COMPILER "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-g++" CACHE PATH "g++")
|
||||
set(CMAKE_AR "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-ar" CACHE PATH "archive")
|
||||
set(CMAKE_LINKER "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-ld" CACHE PATH "linker")
|
||||
set(CMAKE_NM "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-nm" CACHE PATH "nm")
|
||||
set(CMAKE_OBJCOPY "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-objcopy" CACHE PATH "objcopy")
|
||||
set(CMAKE_OBJDUMP "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-objdump" CACHE PATH "objdump")
|
||||
set(CMAKE_STRIP "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-strip" CACHE PATH "strip")
|
||||
set(CMAKE_RANLIB "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-ranlib" CACHE PATH "ranlib")
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH ${COMPUTECPP_SYSROOT_DIR})
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
|
||||
set(CMAKE_SYSROOT "${COMPUTECPP_SYSROOT_DIR}")
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__aarch64__ --sysroot=${COMPUTECPP_SYSROOT_DIR}" CACHE INTERNAL "")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__aarch64__ --sysroot=${COMPUTECPP_SYSROOT_DIR}" CACHE INTERNAL "")
|
||||
|
||||
set(CMAKE_CXX_LINK_EXECUTABLE "<CMAKE_CXX_COMPILER> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>" CACHE INTERNAL "")
|
||||
18
cmake/toolchains/gcc-generic.cmake
Normal file
18
cmake/toolchains/gcc-generic.cmake
Normal file
@ -0,0 +1,18 @@
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(COMPUTECPP_SYSROOT_DIR $ENV{COMPUTECPP_SYSROOT_DIR})
|
||||
set(COMPUTECPP_TOOLCHAIN_DIR $ENV{COMPUTECPP_TOOLCHAIN_DIR})
|
||||
set(COMPUTECPP_TARGET_TRIPLE $ENV{COMPUTECPP_TARGET_TRIPLE})
|
||||
|
||||
if(NOT COMPUTECPP_SYSROOT_DIR OR
|
||||
NOT COMPUTECPP_TOOLCHAIN_DIR OR
|
||||
NOT COMPUTECPP_TARGET_TRIPLE
|
||||
)
|
||||
message(FATAL_ERROR
|
||||
"Please set all of COMPUTECPP_TARGET_TRIPLE, COMPUTECPP_SYSROOT_DIR and "
|
||||
"COMPUTECPP_TOOLCHAIN_DIR in the environment when crosscompiling.")
|
||||
endif()
|
||||
|
||||
set(CMAKE_SYSROOT ${COMPUTECPP_SYSROOT_DIR})
|
||||
set(CMAKE_C_COMPILER ${COMPUTECPP_TOOLCHAIN_DIR}/bin/${COMPUTECPP_TARGET_TRIPLE}-gcc)
|
||||
set(CMAKE_CXX_COMPILER ${COMPUTECPP_TOOLCHAIN_DIR}/bin/${COMPUTECPP_TARGET_TRIPLE}-g++)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER)
|
||||
@ -13,7 +13,7 @@ CXXFLAGS+=-DNTILES=$(TBSIZE)
|
||||
endif
|
||||
|
||||
|
||||
hc-stream: main.cpp HCStream.cpp
|
||||
hc-stream: ../main.cpp HCStream.cpp
|
||||
$(HCC) $(CXXFLAGS) -DHC $^ $(LDFLAGS) $(EXTRA_FLAGS) -o $@
|
||||
|
||||
.PHONY: clean
|
||||
146
register_models.cmake
Normal file
146
register_models.cmake
Normal file
@ -0,0 +1,146 @@
|
||||
|
||||
#function(switch_expr INPUT OUTPUT)
|
||||
# list(LENGTH ARGN N)
|
||||
# math(EXPR EVEN "${N} % 2")
|
||||
# if (NOT EVEN EQUAL 0)
|
||||
# message(FATAL_ERROR "Expr must be a list of string pairs, ${EVEN}")
|
||||
# endif ()
|
||||
# math(EXPR N_ "${N}-1")
|
||||
# foreach (idx RANGE 0 ${N_} 2)
|
||||
# math(EXPR KEY_IDX "${idx} + 0")
|
||||
# math(EXPR VALUE_IDX "${idx} + 1")
|
||||
# list(GET ARGN ${KEY_IDX} KEY)
|
||||
# list(GET ARGN ${VALUE_IDX} VALUE)
|
||||
# if (${KEY} STREQUAL ${INPUT})
|
||||
# set(${OUTPUT} ${VALUE} PARENT_SCOPE)
|
||||
# break()
|
||||
# endif ()
|
||||
# endforeach ()
|
||||
#endfunction()
|
||||
#
|
||||
|
||||
macro(wipe_gcc_style_optimisation_flags VAR)
|
||||
string(REGEX REPLACE "([\\/\\-]O.)" "" ${VAR} ${${VAR}})
|
||||
endmacro()
|
||||
|
||||
macro(register_link_library)
|
||||
list(APPEND LINK_LIBRARIES ${ARGN})
|
||||
endmacro()
|
||||
|
||||
macro(register_append_cxx_flags CONFIG)
|
||||
if ("${CONFIG}" STREQUAL "RELEASE" OR "${CONFIG}" STREQUAL "ANY")
|
||||
list(APPEND DEFAULT_RELEASE_FLAGS ${ARGN})
|
||||
elseif ("${CONFIG}" STREQUAL "DEBUG" OR "${CONFIG}" STREQUAL "ANY")
|
||||
list(APPEND DEFAULT_DEBUG_FLAGS ${ARGN})
|
||||
else ()
|
||||
message(FATAL_ERROR "register_flags supports only RELEASE, DEBUG, or ANY for all configs, got `${CONFIG}`")
|
||||
endif ()
|
||||
endmacro()
|
||||
|
||||
macro(register_append_link_flags)
|
||||
list(APPEND LINK_FLAGS ${ARGN})
|
||||
endmacro()
|
||||
|
||||
macro(register_append_compiler_and_arch_specific_cxx_flags PREFIX CXX ARCH)
|
||||
string(TOUPPER ${CXX} _CXX)
|
||||
string(TOUPPER ${ARCH} _ARCH)
|
||||
set(_CXX_ARCH_SPECIFIC_FLAGS "${${PREFIX}_${_CXX}_${_ARCH}}")
|
||||
if (_CXX_ARCH_SPECIFIC_FLAGS)
|
||||
register_append_cxx_flags(ANY ${_CXX_ARCH_SPECIFIC_FLAGS})
|
||||
endif ()
|
||||
set(_CXX_ARCH_SPECIFIC_FLAGS "${${PREFIX}_${_CXX}}")
|
||||
if (_CXX_ARCH_SPECIFIC_FLAGS)
|
||||
register_append_cxx_flags(ANY ${_CXX_ARCH_SPECIFIC_FLAGS})
|
||||
endif ()
|
||||
endmacro()
|
||||
|
||||
macro(register_definitions)
|
||||
list(APPEND IMPL_DEFINITIONS ${ARGN})
|
||||
endmacro()
|
||||
|
||||
macro(register_flag_required NAME DESCRIPTION)
|
||||
list(APPEND CUSTOM_FLAGS_TRIPLE "${NAME}" "${DESCRIPTION}" ON "")
|
||||
endmacro()
|
||||
|
||||
macro(register_flag_optional NAME DESCRIPTION DEFAULT)
|
||||
list(APPEND CUSTOM_FLAGS_TRIPLE "${NAME}" "${DESCRIPTION}" OFF "${DEFAULT}")
|
||||
endmacro()
|
||||
|
||||
function(registered_flags_action ACTION OUT)
|
||||
list(LENGTH CUSTOM_FLAGS_TRIPLE NFLAGS)
|
||||
if (NOT NFLAGS EQUAL "0")
|
||||
|
||||
if (${ACTION} STREQUAL "print")
|
||||
set(LINE "Supported flags:\n\n")
|
||||
elseif (${ACTION} STREQUAL "check")
|
||||
set(LINE "Model-specific flags for this build:\n\n")
|
||||
endif ()
|
||||
|
||||
|
||||
math(EXPR NFLAGS "${NFLAGS}-1")
|
||||
foreach (idx RANGE 0 ${NFLAGS} 4)
|
||||
math(EXPR NAME_IDX "${idx} + 0")
|
||||
math(EXPR DESCRIPTION_IDX "${idx} + 1")
|
||||
math(EXPR REQUIRED_IDX "${idx} + 2")
|
||||
math(EXPR DEFAULT_VALUE_IDX "${idx} + 3")
|
||||
list(GET CUSTOM_FLAGS_TRIPLE ${NAME_IDX} NAME)
|
||||
list(GET CUSTOM_FLAGS_TRIPLE ${DESCRIPTION_IDX} DESCRIPTION)
|
||||
list(GET CUSTOM_FLAGS_TRIPLE ${REQUIRED_IDX} REQUIRED)
|
||||
list(GET CUSTOM_FLAGS_TRIPLE ${DEFAULT_VALUE_IDX} DEFAULT_VALUE)
|
||||
if (${ACTION} STREQUAL "print")
|
||||
if (${REQUIRED})
|
||||
set(DEFAULT_VALUE "(required)")
|
||||
else ()
|
||||
set(DEFAULT_VALUE "(optional, default=${DEFAULT_VALUE})")
|
||||
endif ()
|
||||
set(LINE "${LINE} ${NAME} ${DEFAULT_VALUE}: ${DESCRIPTION}\n")
|
||||
elseif (${ACTION} STREQUAL "check")
|
||||
if (${REQUIRED})
|
||||
# required flag
|
||||
if (NOT DEFINED ${NAME})
|
||||
message(FATAL_ERROR "`${NAME}` is not set! (${DESCRIPTION})")
|
||||
endif ()
|
||||
else ()
|
||||
# optional flag with default
|
||||
if (NOT DEFINED ${NAME})
|
||||
set(${NAME} "${DEFAULT_VALUE}" PARENT_SCOPE) # setting PARENT_SCOPE does not affect local scope
|
||||
set(${NAME} "${DEFAULT_VALUE}")
|
||||
endif ()
|
||||
endif ()
|
||||
set(LINE "${LINE} ${NAME} = `${${NAME}}`\n")
|
||||
else ()
|
||||
message(FATAL_ERROR "action `${ACTION}` not supported")
|
||||
endif ()
|
||||
endforeach ()
|
||||
endif ()
|
||||
set(${OUT} "${LINE}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
|
||||
macro(register_model NAME PREPROCESSOR_NAME)
|
||||
string(TOUPPER ${NAME} MODEL_UPPER)
|
||||
list(APPEND REGISTERED_MODELS "${NAME}")
|
||||
|
||||
list(APPEND IMPL_${MODEL_UPPER}_SOURCES "${ARGN}")
|
||||
list(APPEND IMPL_${MODEL_UPPER}_DEFINITIONS "${PREPROCESSOR_NAME}")
|
||||
endmacro()
|
||||
|
||||
|
||||
macro(load_model MODEL)
|
||||
string(TOUPPER "${MODEL}" MODEL_UPPER)
|
||||
if ("${MODEL_UPPER}" IN_LIST REGISTERED_MODELS)
|
||||
set(MODEL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${MODEL_UPPER}.cmake)
|
||||
if (NOT EXISTS ${MODEL_FILE})
|
||||
message(FATAL_ERROR "${MODEL_FILE} not found, perhaps it needs to be implemented?")
|
||||
endif ()
|
||||
include(${MODEL_FILE})
|
||||
list(APPEND IMPL_SOURCES ${IMPL_${MODEL_UPPER}_SOURCES})
|
||||
list(APPEND IMPL_DEFINITIONS ${IMPL_${MODEL_UPPER}_DEFINITIONS})
|
||||
|
||||
string(TOLOWER ${MODEL} MODEL_LOWER)
|
||||
set(EXE_NAME ${MODEL_LOWER}-stream)
|
||||
|
||||
else ()
|
||||
message(FATAL_ERROR "Unsupported model: ${MODEL}")
|
||||
endif ()
|
||||
endmacro()
|
||||
Loading…
Reference in New Issue
Block a user