From 6536c1736a80934fb5f4e8267505a9bf92bcf71b Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Fri, 5 Mar 2021 13:41:35 +0000 Subject: [PATCH 01/11] Initial CMake+CI integration --- .github/workflows/main.yaml | 40 + .gitignore | 7 + ACC.cmake | 68 + ACCStream.cpp | 16 +- ACCStream.h | 8 + CL/cl.h | 1902 ++++++++++++++++++ CL/cl_d3d10.h | 117 ++ CL/cl_d3d11.h | 117 ++ CL/cl_dx9_media_sharing.h | 118 ++ CL/cl_dx9_media_sharing_intel.h | 170 ++ CL/cl_egl.h | 120 ++ CL/cl_ext.h | 841 ++++++++ CL/cl_ext_intel.h | 682 +++++++ CL/cl_gl.h | 159 ++ CL/cl_gl_ext.h | 40 + CL/cl_half.h | 440 ++++ CL/cl_icd.h | 1287 ++++++++++++ CL/cl_platform.h | 1384 +++++++++++++ CL/cl_va_api_media_sharing_intel.h | 160 ++ CL/cl_version.h | 81 + CL/opencl.h | 33 + CMakeLists.txt | 184 ++ CUDA.cmake | 30 + HIP.cmake | 7 + KOKKOS.cmake | 30 + OCL.cmake | 17 + OMP.cmake | 131 ++ RAJA.cmake | 84 + RAJAStream.cpp | 26 +- RAJAStream.hpp | 34 +- README.md | 49 +- STD.cmake | 33 + STD20.cmake | 15 + STDStream.h | 2 +- SYCL.cmake | 85 + ci-prepare-bionic.sh | 335 +++ ci-test-compile.sh | 252 +++ cmake/Modules/ComputeCppCompilerChecks.cmake | 65 + cmake/Modules/ComputeCppIRMap.cmake | 18 + cmake/Modules/FindComputeCpp.cmake | 454 +++++ cmake/toolchains/arm-gcc-poky.cmake | 34 + cmake/toolchains/gcc-generic.cmake | 18 + HC.make => legacy/HC.make | 2 +- HCStream.cpp => legacy/HCStream.cpp | 0 HCStream.h => legacy/HCStream.h | 0 register_models.cmake | 136 ++ 46 files changed, 9799 insertions(+), 32 deletions(-) create mode 100644 .github/workflows/main.yaml create mode 100644 ACC.cmake create mode 100644 CL/cl.h create mode 100644 CL/cl_d3d10.h create mode 100644 CL/cl_d3d11.h create mode 100644 CL/cl_dx9_media_sharing.h create mode 100644 CL/cl_dx9_media_sharing_intel.h create mode 100644 CL/cl_egl.h create mode 100644 CL/cl_ext.h create mode 100644 CL/cl_ext_intel.h create mode 100644 CL/cl_gl.h create mode 100644 CL/cl_gl_ext.h create mode 100644 CL/cl_half.h create mode 100644 CL/cl_icd.h create mode 100644 CL/cl_platform.h create mode 100644 CL/cl_va_api_media_sharing_intel.h create mode 100644 CL/cl_version.h create mode 100644 CL/opencl.h create mode 100644 CMakeLists.txt create mode 100644 CUDA.cmake create mode 100644 HIP.cmake create mode 100644 KOKKOS.cmake create mode 100644 OCL.cmake create mode 100644 OMP.cmake create mode 100644 RAJA.cmake create mode 100644 STD.cmake create mode 100644 STD20.cmake create mode 100644 SYCL.cmake create mode 100755 ci-prepare-bionic.sh create mode 100755 ci-test-compile.sh create mode 100644 cmake/Modules/ComputeCppCompilerChecks.cmake create mode 100644 cmake/Modules/ComputeCppIRMap.cmake create mode 100644 cmake/Modules/FindComputeCpp.cmake create mode 100644 cmake/toolchains/arm-gcc-poky.cmake create mode 100644 cmake/toolchains/gcc-generic.cmake rename HC.make => legacy/HC.make (89%) rename HCStream.cpp => legacy/HCStream.cpp (100%) rename HCStream.h => legacy/HCStream.h (100%) create mode 100644 register_models.cmake diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml new file mode 100644 index 0000000..2df71cb --- /dev/null +++ b/.github/workflows/main.yaml @@ -0,0 +1,40 @@ +name: CI +on: + push: + pull_request: + +jobs: + test: + runs-on: ubuntu-18.04 + steps: + - uses: actions/checkout@v2 + + - name: Cache compiler + id: prepare-compilers + uses: actions/cache@v2 + with: + path: compilers + key: ${{ runner.os }}-${{ hashFiles('ci-prepare-bionic.sh') }} + + - name: Prepare compilers + if: steps.prepare-compilers.outputs.cache-hit != 'true' + run: source ./ci-prepare-bionic.sh ./compilers SETUP true || true + + - name: Setup test environment + run: source ./ci-prepare-bionic.sh ./compilers VARS false || true + - name: Test compile gcc + run: ./ci-test-compile.sh ./build gcc all + - name: Test compile clang + run: ./ci-test-compile.sh ./build clang all + - name: Test compile nvhpc + run: ./ci-test-compile.sh ./build nvhpc all + - name: Test compile aocc + run: ./ci-test-compile.sh ./build aocc all + - name: Test compile aomp + run: ./ci-test-compile.sh ./build aomp all + - name: Test compile hip + run: ./ci-test-compile.sh ./build hip all + - name: Test compile dpcpp + run: ./ci-test-compile.sh ./build dpcpp all + - name: Test compile hipsycl + run: ./ci-test-compile.sh ./build hipsycl all diff --git a/.gitignore b/.gitignore index 6a98d24..c3ea1da 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,10 @@ KokkosCore_config.* .DS_Store Makefile + +build/ +cmake-build-*/ +CMakeFiles/ +.idea/ +.vscode/ +.directory \ No newline at end of file diff --git a/ACC.cmake b/ACC.cmake new file mode 100644 index 0000000..f951fae --- /dev/null +++ b/ACC.cmake @@ -0,0 +1,68 @@ + +register_flag_optional(CMAKE_CXX_COMPILER + "Any CXX compiler that supports OpenACC as per CMake detection" + "c++") + +register_flag_optional(TARGET_DEVICE + "[PGI/NVHPC only] This sets the `-target` flag, possible values are: + gpu - Globally set the target device to an NVIDIA GPU + multicore - Globally set the target device to the host CPU + Refer to `nvc++ --help` for the full list" + "") + + +register_flag_optional(CUDA_ARCH + "[PGI/NVHPC only] Only applicable if `TARGET_DEVICE` is set to `gpu`. + Nvidia architecture in ccXY format, for example, sm_70 becomes cc70, will be passed in via `-gpu=` (e.g `cc70`) + Possible values are: + cc35 - Compile for compute capability 3.5 + cc50 - Compile for compute capability 5.0 + cc60 - Compile for compute capability 6.0 + cc62 - Compile for compute capability 6.2 + cc70 - Compile for compute capability 7.0 + cc72 - Compile for compute capability 7.2 + cc75 - Compile for compute capability 7.5 + cc80 - Compile for compute capability 8.0 + ccall - Compile for all supported compute capabilities + Refer to `nvc++ --help` for the full list" + "") + +register_flag_optional(TARGET_PROCESSOR + "[PGI/NVHPC only] This sets the `-tp` (target processor) flag, possible values are: + px - Generic x86 Processor + bulldozer - AMD Bulldozer processor + piledriver - AMD Piledriver processor + zen - AMD Zen architecture (Epyc, Ryzen) + zen2 - AMD Zen 2 architecture (Ryzen 2) + sandybridge - Intel SandyBridge processor + haswell - Intel Haswell processor + knl - Intel Knights Landing processor + skylake - Intel Skylake Xeon processor + host - Link native version of HPC SDK cpu math library + native - Alias for -tp host + Refer to `nvc++ --help` for the full list" + "") + +macro(setup) + find_package(OpenACC REQUIRED) + register_link_library(OpenACC::OpenACC_CXX) + register_definitions(restrict=__restrict) + # XXX NVHPC is really new so older Cmake thinks it's PGI, which is true + if ((CMAKE_CXX_COMPILER_ID STREQUAL PGI) OR (CMAKE_CXX_COMPILER_ID STREQUAL NVHPC)) + + if (TARGET_DEVICE) + register_append_cxx_flags(ANY -target=${TARGET_DEVICE}) + endif () + + if (CUDA_ARCH) + register_append_cxx_flags(ANY -gpu=${CUDA_ARCH}) + endif () + + if (TARGET_PROCESSOR) + register_append_cxx_flags(ANY -tp=${TARGET_PROCESSOR}) + endif () + + endif () + +endmacro() + diff --git a/ACCStream.cpp b/ACCStream.cpp index 9e0e3e7..1e38c8b 100644 --- a/ACCStream.cpp +++ b/ACCStream.cpp @@ -16,9 +16,14 @@ ACCStream::ACCStream(const int ARRAY_SIZE, int device) array_size = ARRAY_SIZE; // Set up data region on device - a = new T[array_size]; - b = new T[array_size]; - c = new T[array_size]; + this->a = new T[array_size]; + this->b = new T[array_size]; + this->c = new T[array_size]; + + T * restrict a = this->a; + T * restrict b = this->b; + T * restrict c = this->c; + #pragma acc enter data create(a[0:array_size], b[0:array_size], c[0:array_size]) {} } @@ -28,6 +33,11 @@ ACCStream::~ACCStream() { // End data region on device int array_size = this->array_size; + + T * restrict a = this->a; + T * restrict b = this->b; + T * restrict c = this->c; + #pragma acc exit data delete(a[0:array_size], b[0:array_size], c[0:array_size]) {} diff --git a/ACCStream.h b/ACCStream.h index 4cb9d25..28fb833 100644 --- a/ACCStream.h +++ b/ACCStream.h @@ -19,9 +19,17 @@ template class ACCStream : public Stream { + + struct A{ + T *a; + T *b; + T *c; + }; + protected: // Size of arrays int array_size; + A aa; // Device side pointers T *a; T *b; diff --git a/CL/cl.h b/CL/cl.h new file mode 100644 index 0000000..f33f999 --- /dev/null +++ b/CL/cl.h @@ -0,0 +1,1902 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_CL_H +#define __OPENCL_CL_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/******************************************************************************/ + +typedef struct _cl_platform_id * cl_platform_id; +typedef struct _cl_device_id * cl_device_id; +typedef struct _cl_context * cl_context; +typedef struct _cl_command_queue * cl_command_queue; +typedef struct _cl_mem * cl_mem; +typedef struct _cl_program * cl_program; +typedef struct _cl_kernel * cl_kernel; +typedef struct _cl_event * cl_event; +typedef struct _cl_sampler * cl_sampler; + +typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ +typedef cl_ulong cl_bitfield; +typedef cl_bitfield cl_device_type; +typedef cl_uint cl_platform_info; +typedef cl_uint cl_device_info; +typedef cl_bitfield cl_device_fp_config; +typedef cl_uint cl_device_mem_cache_type; +typedef cl_uint cl_device_local_mem_type; +typedef cl_bitfield cl_device_exec_capabilities; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_device_svm_capabilities; +#endif +typedef cl_bitfield cl_command_queue_properties; +#ifdef CL_VERSION_1_2 +typedef intptr_t cl_device_partition_property; +typedef cl_bitfield cl_device_affinity_domain; +#endif + +typedef intptr_t cl_context_properties; +typedef cl_uint cl_context_info; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_queue_properties; +#endif +typedef cl_uint cl_command_queue_info; +typedef cl_uint cl_channel_order; +typedef cl_uint cl_channel_type; +typedef cl_bitfield cl_mem_flags; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_svm_mem_flags; +#endif +typedef cl_uint cl_mem_object_type; +typedef cl_uint cl_mem_info; +#ifdef CL_VERSION_1_2 +typedef cl_bitfield cl_mem_migration_flags; +#endif +typedef cl_uint cl_image_info; +#ifdef CL_VERSION_1_1 +typedef cl_uint cl_buffer_create_type; +#endif +typedef cl_uint cl_addressing_mode; +typedef cl_uint cl_filter_mode; +typedef cl_uint cl_sampler_info; +typedef cl_bitfield cl_map_flags; +#ifdef CL_VERSION_2_0 +typedef intptr_t cl_pipe_properties; +typedef cl_uint cl_pipe_info; +#endif +typedef cl_uint cl_program_info; +typedef cl_uint cl_program_build_info; +#ifdef CL_VERSION_1_2 +typedef cl_uint cl_program_binary_type; +#endif +typedef cl_int cl_build_status; +typedef cl_uint cl_kernel_info; +#ifdef CL_VERSION_1_2 +typedef cl_uint cl_kernel_arg_info; +typedef cl_uint cl_kernel_arg_address_qualifier; +typedef cl_uint cl_kernel_arg_access_qualifier; +typedef cl_bitfield cl_kernel_arg_type_qualifier; +#endif +typedef cl_uint cl_kernel_work_group_info; +#ifdef CL_VERSION_2_1 +typedef cl_uint cl_kernel_sub_group_info; +#endif +typedef cl_uint cl_event_info; +typedef cl_uint cl_command_type; +typedef cl_uint cl_profiling_info; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_sampler_properties; +typedef cl_uint cl_kernel_exec_info; +#endif +#ifdef CL_VERSION_3_0 +typedef cl_bitfield cl_device_atomic_capabilities; +typedef cl_uint cl_khronos_vendor_id; +typedef cl_bitfield cl_mem_properties; +typedef cl_uint cl_version; +#endif + +typedef struct _cl_image_format { + cl_channel_order image_channel_order; + cl_channel_type image_channel_data_type; +} cl_image_format; + +#ifdef CL_VERSION_1_2 + +typedef struct _cl_image_desc { + cl_mem_object_type image_type; + size_t image_width; + size_t image_height; + size_t image_depth; + size_t image_array_size; + size_t image_row_pitch; + size_t image_slice_pitch; + cl_uint num_mip_levels; + cl_uint num_samples; +#ifdef CL_VERSION_2_0 +#ifdef __GNUC__ + __extension__ /* Prevents warnings about anonymous union in -pedantic builds */ +#endif +#ifdef _MSC_VER +#pragma warning( push ) +#pragma warning( disable : 4201 ) /* Prevents warning about nameless struct/union in /W4 /Za builds */ +#endif + union { +#endif + cl_mem buffer; +#ifdef CL_VERSION_2_0 + cl_mem mem_object; + }; +#ifdef _MSC_VER +#pragma warning( pop ) +#endif +#endif +} cl_image_desc; + +#endif + +#ifdef CL_VERSION_1_1 + +typedef struct _cl_buffer_region { + size_t origin; + size_t size; +} cl_buffer_region; + +#endif + +#ifdef CL_VERSION_3_0 + +#define CL_NAME_VERSION_MAX_NAME_SIZE 64 + +typedef struct _cl_name_version { + cl_version version; + char name[CL_NAME_VERSION_MAX_NAME_SIZE]; +} cl_name_version; + +#endif + +/******************************************************************************/ + +/* Error Codes */ +#define CL_SUCCESS 0 +#define CL_DEVICE_NOT_FOUND -1 +#define CL_DEVICE_NOT_AVAILABLE -2 +#define CL_COMPILER_NOT_AVAILABLE -3 +#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 +#define CL_OUT_OF_RESOURCES -5 +#define CL_OUT_OF_HOST_MEMORY -6 +#define CL_PROFILING_INFO_NOT_AVAILABLE -7 +#define CL_MEM_COPY_OVERLAP -8 +#define CL_IMAGE_FORMAT_MISMATCH -9 +#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 +#define CL_BUILD_PROGRAM_FAILURE -11 +#define CL_MAP_FAILURE -12 +#ifdef CL_VERSION_1_1 +#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 +#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 +#endif +#ifdef CL_VERSION_1_2 +#define CL_COMPILE_PROGRAM_FAILURE -15 +#define CL_LINKER_NOT_AVAILABLE -16 +#define CL_LINK_PROGRAM_FAILURE -17 +#define CL_DEVICE_PARTITION_FAILED -18 +#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 +#endif + +#define CL_INVALID_VALUE -30 +#define CL_INVALID_DEVICE_TYPE -31 +#define CL_INVALID_PLATFORM -32 +#define CL_INVALID_DEVICE -33 +#define CL_INVALID_CONTEXT -34 +#define CL_INVALID_QUEUE_PROPERTIES -35 +#define CL_INVALID_COMMAND_QUEUE -36 +#define CL_INVALID_HOST_PTR -37 +#define CL_INVALID_MEM_OBJECT -38 +#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 +#define CL_INVALID_IMAGE_SIZE -40 +#define CL_INVALID_SAMPLER -41 +#define CL_INVALID_BINARY -42 +#define CL_INVALID_BUILD_OPTIONS -43 +#define CL_INVALID_PROGRAM -44 +#define CL_INVALID_PROGRAM_EXECUTABLE -45 +#define CL_INVALID_KERNEL_NAME -46 +#define CL_INVALID_KERNEL_DEFINITION -47 +#define CL_INVALID_KERNEL -48 +#define CL_INVALID_ARG_INDEX -49 +#define CL_INVALID_ARG_VALUE -50 +#define CL_INVALID_ARG_SIZE -51 +#define CL_INVALID_KERNEL_ARGS -52 +#define CL_INVALID_WORK_DIMENSION -53 +#define CL_INVALID_WORK_GROUP_SIZE -54 +#define CL_INVALID_WORK_ITEM_SIZE -55 +#define CL_INVALID_GLOBAL_OFFSET -56 +#define CL_INVALID_EVENT_WAIT_LIST -57 +#define CL_INVALID_EVENT -58 +#define CL_INVALID_OPERATION -59 +#define CL_INVALID_GL_OBJECT -60 +#define CL_INVALID_BUFFER_SIZE -61 +#define CL_INVALID_MIP_LEVEL -62 +#define CL_INVALID_GLOBAL_WORK_SIZE -63 +#ifdef CL_VERSION_1_1 +#define CL_INVALID_PROPERTY -64 +#endif +#ifdef CL_VERSION_1_2 +#define CL_INVALID_IMAGE_DESCRIPTOR -65 +#define CL_INVALID_COMPILER_OPTIONS -66 +#define CL_INVALID_LINKER_OPTIONS -67 +#define CL_INVALID_DEVICE_PARTITION_COUNT -68 +#endif +#ifdef CL_VERSION_2_0 +#define CL_INVALID_PIPE_SIZE -69 +#define CL_INVALID_DEVICE_QUEUE -70 +#endif +#ifdef CL_VERSION_2_2 +#define CL_INVALID_SPEC_ID -71 +#define CL_MAX_SIZE_RESTRICTION_EXCEEDED -72 +#endif + + +/* cl_bool */ +#define CL_FALSE 0 +#define CL_TRUE 1 +#ifdef CL_VERSION_1_2 +#define CL_BLOCKING CL_TRUE +#define CL_NON_BLOCKING CL_FALSE +#endif + +/* cl_platform_info */ +#define CL_PLATFORM_PROFILE 0x0900 +#define CL_PLATFORM_VERSION 0x0901 +#define CL_PLATFORM_NAME 0x0902 +#define CL_PLATFORM_VENDOR 0x0903 +#define CL_PLATFORM_EXTENSIONS 0x0904 +#ifdef CL_VERSION_2_1 +#define CL_PLATFORM_HOST_TIMER_RESOLUTION 0x0905 +#endif +#ifdef CL_VERSION_3_0 +#define CL_PLATFORM_NUMERIC_VERSION 0x0906 +#define CL_PLATFORM_EXTENSIONS_WITH_VERSION 0x0907 +#endif + +/* cl_device_type - bitfield */ +#define CL_DEVICE_TYPE_DEFAULT (1 << 0) +#define CL_DEVICE_TYPE_CPU (1 << 1) +#define CL_DEVICE_TYPE_GPU (1 << 2) +#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_TYPE_CUSTOM (1 << 4) +#endif +#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF + +/* cl_device_info */ +#define CL_DEVICE_TYPE 0x1000 +#define CL_DEVICE_VENDOR_ID 0x1001 +#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 +#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 +#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B +#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C +#define CL_DEVICE_ADDRESS_BITS 0x100D +#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E +#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F +#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 +#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 +#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 +#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 +#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 +#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 +#define CL_DEVICE_IMAGE_SUPPORT 0x1016 +#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 +#define CL_DEVICE_MAX_SAMPLERS 0x1018 +#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 +#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A +#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B +#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C +#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D +#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E +#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F +#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 +#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 +#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 +#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 +#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 +#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 +#define CL_DEVICE_ENDIAN_LITTLE 0x1026 +#define CL_DEVICE_AVAILABLE 0x1027 +#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 +#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 +#define CL_DEVICE_QUEUE_PROPERTIES 0x102A /* deprecated */ +#ifdef CL_VERSION_2_0 +#define CL_DEVICE_QUEUE_ON_HOST_PROPERTIES 0x102A +#endif +#define CL_DEVICE_NAME 0x102B +#define CL_DEVICE_VENDOR 0x102C +#define CL_DRIVER_VERSION 0x102D +#define CL_DEVICE_PROFILE 0x102E +#define CL_DEVICE_VERSION 0x102F +#define CL_DEVICE_EXTENSIONS 0x1030 +#define CL_DEVICE_PLATFORM 0x1031 +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +#endif +/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG which is already defined in "cl_ext.h" */ +#ifdef CL_VERSION_1_1 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 +#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 /* deprecated */ +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C +#define CL_DEVICE_OPENCL_C_VERSION 0x103D +#endif +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_LINKER_AVAILABLE 0x103E +#define CL_DEVICE_BUILT_IN_KERNELS 0x103F +#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 +#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 +#define CL_DEVICE_PARENT_DEVICE 0x1042 +#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 +#define CL_DEVICE_PARTITION_PROPERTIES 0x1044 +#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 +#define CL_DEVICE_PARTITION_TYPE 0x1046 +#define CL_DEVICE_REFERENCE_COUNT 0x1047 +#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 +#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 +#endif +#ifdef CL_VERSION_2_0 +#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A +#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B +#define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS 0x104C +#define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE 0x104D +#define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES 0x104E +#define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE 0x104F +#define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE 0x1050 +#define CL_DEVICE_MAX_ON_DEVICE_QUEUES 0x1051 +#define CL_DEVICE_MAX_ON_DEVICE_EVENTS 0x1052 +#define CL_DEVICE_SVM_CAPABILITIES 0x1053 +#define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE 0x1054 +#define CL_DEVICE_MAX_PIPE_ARGS 0x1055 +#define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS 0x1056 +#define CL_DEVICE_PIPE_MAX_PACKET_SIZE 0x1057 +#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT 0x1058 +#define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT 0x1059 +#define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT 0x105A +#endif +#ifdef CL_VERSION_2_1 +#define CL_DEVICE_IL_VERSION 0x105B +#define CL_DEVICE_MAX_NUM_SUB_GROUPS 0x105C +#define CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS 0x105D +#endif +#ifdef CL_VERSION_3_0 +#define CL_DEVICE_NUMERIC_VERSION 0x105E +#define CL_DEVICE_EXTENSIONS_WITH_VERSION 0x1060 +#define CL_DEVICE_ILS_WITH_VERSION 0x1061 +#define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION 0x1062 +#define CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES 0x1063 +#define CL_DEVICE_ATOMIC_FENCE_CAPABILITIES 0x1064 +#define CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT 0x1065 +#define CL_DEVICE_OPENCL_C_ALL_VERSIONS 0x1066 +#define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x1067 +#define CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT 0x1068 +#define CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT 0x1069 +/* 0x106A to 0x106E - Reserved for upcoming KHR extension */ +#define CL_DEVICE_OPENCL_C_FEATURES 0x106F +#define CL_DEVICE_DEVICE_ENQUEUE_SUPPORT 0x1070 +#define CL_DEVICE_PIPE_SUPPORT 0x1071 +#endif + +/* cl_device_fp_config - bitfield */ +#define CL_FP_DENORM (1 << 0) +#define CL_FP_INF_NAN (1 << 1) +#define CL_FP_ROUND_TO_NEAREST (1 << 2) +#define CL_FP_ROUND_TO_ZERO (1 << 3) +#define CL_FP_ROUND_TO_INF (1 << 4) +#define CL_FP_FMA (1 << 5) +#ifdef CL_VERSION_1_1 +#define CL_FP_SOFT_FLOAT (1 << 6) +#endif +#ifdef CL_VERSION_1_2 +#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) +#endif + +/* cl_device_mem_cache_type */ +#define CL_NONE 0x0 +#define CL_READ_ONLY_CACHE 0x1 +#define CL_READ_WRITE_CACHE 0x2 + +/* cl_device_local_mem_type */ +#define CL_LOCAL 0x1 +#define CL_GLOBAL 0x2 + +/* cl_device_exec_capabilities - bitfield */ +#define CL_EXEC_KERNEL (1 << 0) +#define CL_EXEC_NATIVE_KERNEL (1 << 1) + +/* cl_command_queue_properties - bitfield */ +#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) +#define CL_QUEUE_PROFILING_ENABLE (1 << 1) +#ifdef CL_VERSION_2_0 +#define CL_QUEUE_ON_DEVICE (1 << 2) +#define CL_QUEUE_ON_DEVICE_DEFAULT (1 << 3) +#endif + +/* cl_context_info */ +#define CL_CONTEXT_REFERENCE_COUNT 0x1080 +#define CL_CONTEXT_DEVICES 0x1081 +#define CL_CONTEXT_PROPERTIES 0x1082 +#ifdef CL_VERSION_1_1 +#define CL_CONTEXT_NUM_DEVICES 0x1083 +#endif + +/* cl_context_properties */ +#define CL_CONTEXT_PLATFORM 0x1084 +#ifdef CL_VERSION_1_2 +#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_device_partition_property */ +#define CL_DEVICE_PARTITION_EQUALLY 0x1086 +#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 +#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_device_affinity_domain */ +#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) +#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) +#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) +#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) +#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) +#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) + +#endif + +#ifdef CL_VERSION_2_0 + +/* cl_device_svm_capabilities */ +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) +#define CL_DEVICE_SVM_ATOMICS (1 << 3) + +#endif + +/* cl_command_queue_info */ +#define CL_QUEUE_CONTEXT 0x1090 +#define CL_QUEUE_DEVICE 0x1091 +#define CL_QUEUE_REFERENCE_COUNT 0x1092 +#define CL_QUEUE_PROPERTIES 0x1093 +#ifdef CL_VERSION_2_0 +#define CL_QUEUE_SIZE 0x1094 +#endif +#ifdef CL_VERSION_2_1 +#define CL_QUEUE_DEVICE_DEFAULT 0x1095 +#endif +#ifdef CL_VERSION_3_0 +#define CL_QUEUE_PROPERTIES_ARRAY 0x1098 +#endif + +/* cl_mem_flags and cl_svm_mem_flags - bitfield */ +#define CL_MEM_READ_WRITE (1 << 0) +#define CL_MEM_WRITE_ONLY (1 << 1) +#define CL_MEM_READ_ONLY (1 << 2) +#define CL_MEM_USE_HOST_PTR (1 << 3) +#define CL_MEM_ALLOC_HOST_PTR (1 << 4) +#define CL_MEM_COPY_HOST_PTR (1 << 5) +/* reserved (1 << 6) */ +#ifdef CL_VERSION_1_2 +#define CL_MEM_HOST_WRITE_ONLY (1 << 7) +#define CL_MEM_HOST_READ_ONLY (1 << 8) +#define CL_MEM_HOST_NO_ACCESS (1 << 9) +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10) /* used by cl_svm_mem_flags only */ +#define CL_MEM_SVM_ATOMICS (1 << 11) /* used by cl_svm_mem_flags only */ +#define CL_MEM_KERNEL_READ_AND_WRITE (1 << 12) +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_mem_migration_flags - bitfield */ +#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) +#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) + +#endif + +/* cl_channel_order */ +#define CL_R 0x10B0 +#define CL_A 0x10B1 +#define CL_RG 0x10B2 +#define CL_RA 0x10B3 +#define CL_RGB 0x10B4 +#define CL_RGBA 0x10B5 +#define CL_BGRA 0x10B6 +#define CL_ARGB 0x10B7 +#define CL_INTENSITY 0x10B8 +#define CL_LUMINANCE 0x10B9 +#ifdef CL_VERSION_1_1 +#define CL_Rx 0x10BA +#define CL_RGx 0x10BB +#define CL_RGBx 0x10BC +#endif +#ifdef CL_VERSION_1_2 +#define CL_DEPTH 0x10BD +#define CL_DEPTH_STENCIL 0x10BE +#endif +#ifdef CL_VERSION_2_0 +#define CL_sRGB 0x10BF +#define CL_sRGBx 0x10C0 +#define CL_sRGBA 0x10C1 +#define CL_sBGRA 0x10C2 +#define CL_ABGR 0x10C3 +#endif + +/* cl_channel_type */ +#define CL_SNORM_INT8 0x10D0 +#define CL_SNORM_INT16 0x10D1 +#define CL_UNORM_INT8 0x10D2 +#define CL_UNORM_INT16 0x10D3 +#define CL_UNORM_SHORT_565 0x10D4 +#define CL_UNORM_SHORT_555 0x10D5 +#define CL_UNORM_INT_101010 0x10D6 +#define CL_SIGNED_INT8 0x10D7 +#define CL_SIGNED_INT16 0x10D8 +#define CL_SIGNED_INT32 0x10D9 +#define CL_UNSIGNED_INT8 0x10DA +#define CL_UNSIGNED_INT16 0x10DB +#define CL_UNSIGNED_INT32 0x10DC +#define CL_HALF_FLOAT 0x10DD +#define CL_FLOAT 0x10DE +#ifdef CL_VERSION_1_2 +#define CL_UNORM_INT24 0x10DF +#endif +#ifdef CL_VERSION_2_1 +#define CL_UNORM_INT_101010_2 0x10E0 +#endif + +/* cl_mem_object_type */ +#define CL_MEM_OBJECT_BUFFER 0x10F0 +#define CL_MEM_OBJECT_IMAGE2D 0x10F1 +#define CL_MEM_OBJECT_IMAGE3D 0x10F2 +#ifdef CL_VERSION_1_2 +#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 +#define CL_MEM_OBJECT_IMAGE1D 0x10F4 +#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 +#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_OBJECT_PIPE 0x10F7 +#endif + +/* cl_mem_info */ +#define CL_MEM_TYPE 0x1100 +#define CL_MEM_FLAGS 0x1101 +#define CL_MEM_SIZE 0x1102 +#define CL_MEM_HOST_PTR 0x1103 +#define CL_MEM_MAP_COUNT 0x1104 +#define CL_MEM_REFERENCE_COUNT 0x1105 +#define CL_MEM_CONTEXT 0x1106 +#ifdef CL_VERSION_1_1 +#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 +#define CL_MEM_OFFSET 0x1108 +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_USES_SVM_POINTER 0x1109 +#endif +#ifdef CL_VERSION_3_0 +#define CL_MEM_PROPERTIES 0x110A +#endif + +/* cl_image_info */ +#define CL_IMAGE_FORMAT 0x1110 +#define CL_IMAGE_ELEMENT_SIZE 0x1111 +#define CL_IMAGE_ROW_PITCH 0x1112 +#define CL_IMAGE_SLICE_PITCH 0x1113 +#define CL_IMAGE_WIDTH 0x1114 +#define CL_IMAGE_HEIGHT 0x1115 +#define CL_IMAGE_DEPTH 0x1116 +#ifdef CL_VERSION_1_2 +#define CL_IMAGE_ARRAY_SIZE 0x1117 +#define CL_IMAGE_BUFFER 0x1118 +#define CL_IMAGE_NUM_MIP_LEVELS 0x1119 +#define CL_IMAGE_NUM_SAMPLES 0x111A +#endif + + +/* cl_pipe_info */ +#ifdef CL_VERSION_2_0 +#define CL_PIPE_PACKET_SIZE 0x1120 +#define CL_PIPE_MAX_PACKETS 0x1121 +#endif +#ifdef CL_VERSION_3_0 +#define CL_PIPE_PROPERTIES 0x1122 +#endif + +/* cl_addressing_mode */ +#define CL_ADDRESS_NONE 0x1130 +#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 +#define CL_ADDRESS_CLAMP 0x1132 +#define CL_ADDRESS_REPEAT 0x1133 +#ifdef CL_VERSION_1_1 +#define CL_ADDRESS_MIRRORED_REPEAT 0x1134 +#endif + +/* cl_filter_mode */ +#define CL_FILTER_NEAREST 0x1140 +#define CL_FILTER_LINEAR 0x1141 + +/* cl_sampler_info */ +#define CL_SAMPLER_REFERENCE_COUNT 0x1150 +#define CL_SAMPLER_CONTEXT 0x1151 +#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 +#define CL_SAMPLER_ADDRESSING_MODE 0x1153 +#define CL_SAMPLER_FILTER_MODE 0x1154 +#ifdef CL_VERSION_2_0 +/* These enumerants are for the cl_khr_mipmap_image extension. + They have since been added to cl_ext.h with an appropriate + KHR suffix, but are left here for backwards compatibility. */ +#define CL_SAMPLER_MIP_FILTER_MODE 0x1155 +#define CL_SAMPLER_LOD_MIN 0x1156 +#define CL_SAMPLER_LOD_MAX 0x1157 +#endif +#ifdef CL_VERSION_3_0 +#define CL_SAMPLER_PROPERTIES 0x1158 +#endif + +/* cl_map_flags - bitfield */ +#define CL_MAP_READ (1 << 0) +#define CL_MAP_WRITE (1 << 1) +#ifdef CL_VERSION_1_2 +#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) +#endif + +/* cl_program_info */ +#define CL_PROGRAM_REFERENCE_COUNT 0x1160 +#define CL_PROGRAM_CONTEXT 0x1161 +#define CL_PROGRAM_NUM_DEVICES 0x1162 +#define CL_PROGRAM_DEVICES 0x1163 +#define CL_PROGRAM_SOURCE 0x1164 +#define CL_PROGRAM_BINARY_SIZES 0x1165 +#define CL_PROGRAM_BINARIES 0x1166 +#ifdef CL_VERSION_1_2 +#define CL_PROGRAM_NUM_KERNELS 0x1167 +#define CL_PROGRAM_KERNEL_NAMES 0x1168 +#endif +#ifdef CL_VERSION_2_1 +#define CL_PROGRAM_IL 0x1169 +#endif +#ifdef CL_VERSION_2_2 +#define CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT 0x116A +#define CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT 0x116B +#endif + +/* cl_program_build_info */ +#define CL_PROGRAM_BUILD_STATUS 0x1181 +#define CL_PROGRAM_BUILD_OPTIONS 0x1182 +#define CL_PROGRAM_BUILD_LOG 0x1183 +#ifdef CL_VERSION_1_2 +#define CL_PROGRAM_BINARY_TYPE 0x1184 +#endif +#ifdef CL_VERSION_2_0 +#define CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE 0x1185 +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_program_binary_type */ +#define CL_PROGRAM_BINARY_TYPE_NONE 0x0 +#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 +#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 +#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 + +#endif + +/* cl_build_status */ +#define CL_BUILD_SUCCESS 0 +#define CL_BUILD_NONE -1 +#define CL_BUILD_ERROR -2 +#define CL_BUILD_IN_PROGRESS -3 + +/* cl_kernel_info */ +#define CL_KERNEL_FUNCTION_NAME 0x1190 +#define CL_KERNEL_NUM_ARGS 0x1191 +#define CL_KERNEL_REFERENCE_COUNT 0x1192 +#define CL_KERNEL_CONTEXT 0x1193 +#define CL_KERNEL_PROGRAM 0x1194 +#ifdef CL_VERSION_1_2 +#define CL_KERNEL_ATTRIBUTES 0x1195 +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_info */ +#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 +#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 +#define CL_KERNEL_ARG_TYPE_NAME 0x1198 +#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 +#define CL_KERNEL_ARG_NAME 0x119A + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_address_qualifier */ +#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B +#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C +#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D +#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_access_qualifier */ +#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 +#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 +#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 +#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_type_qualifier */ +#define CL_KERNEL_ARG_TYPE_NONE 0 +#define CL_KERNEL_ARG_TYPE_CONST (1 << 0) +#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) +#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) +#ifdef CL_VERSION_2_0 +#define CL_KERNEL_ARG_TYPE_PIPE (1 << 3) +#endif + +#endif + +/* cl_kernel_work_group_info */ +#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 +#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 +#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 +#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 +#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 +#ifdef CL_VERSION_1_2 +#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 +#endif + +#ifdef CL_VERSION_2_1 + +/* cl_kernel_sub_group_info */ +#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE 0x2033 +#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE 0x2034 +#define CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT 0x11B8 +#define CL_KERNEL_MAX_NUM_SUB_GROUPS 0x11B9 +#define CL_KERNEL_COMPILE_NUM_SUB_GROUPS 0x11BA + +#endif + +#ifdef CL_VERSION_2_0 + +/* cl_kernel_exec_info */ +#define CL_KERNEL_EXEC_INFO_SVM_PTRS 0x11B6 +#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM 0x11B7 + +#endif + +/* cl_event_info */ +#define CL_EVENT_COMMAND_QUEUE 0x11D0 +#define CL_EVENT_COMMAND_TYPE 0x11D1 +#define CL_EVENT_REFERENCE_COUNT 0x11D2 +#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 +#ifdef CL_VERSION_1_1 +#define CL_EVENT_CONTEXT 0x11D4 +#endif + +/* cl_command_type */ +#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 +#define CL_COMMAND_TASK 0x11F1 +#define CL_COMMAND_NATIVE_KERNEL 0x11F2 +#define CL_COMMAND_READ_BUFFER 0x11F3 +#define CL_COMMAND_WRITE_BUFFER 0x11F4 +#define CL_COMMAND_COPY_BUFFER 0x11F5 +#define CL_COMMAND_READ_IMAGE 0x11F6 +#define CL_COMMAND_WRITE_IMAGE 0x11F7 +#define CL_COMMAND_COPY_IMAGE 0x11F8 +#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 +#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA +#define CL_COMMAND_MAP_BUFFER 0x11FB +#define CL_COMMAND_MAP_IMAGE 0x11FC +#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD +#define CL_COMMAND_MARKER 0x11FE +#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF +#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 +#ifdef CL_VERSION_1_1 +#define CL_COMMAND_READ_BUFFER_RECT 0x1201 +#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 +#define CL_COMMAND_COPY_BUFFER_RECT 0x1203 +#define CL_COMMAND_USER 0x1204 +#endif +#ifdef CL_VERSION_1_2 +#define CL_COMMAND_BARRIER 0x1205 +#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 +#define CL_COMMAND_FILL_BUFFER 0x1207 +#define CL_COMMAND_FILL_IMAGE 0x1208 +#endif +#ifdef CL_VERSION_2_0 +#define CL_COMMAND_SVM_FREE 0x1209 +#define CL_COMMAND_SVM_MEMCPY 0x120A +#define CL_COMMAND_SVM_MEMFILL 0x120B +#define CL_COMMAND_SVM_MAP 0x120C +#define CL_COMMAND_SVM_UNMAP 0x120D +#endif +#ifdef CL_VERSION_3_0 +#define CL_COMMAND_SVM_MIGRATE_MEM 0x120E +#endif + +/* command execution status */ +#define CL_COMPLETE 0x0 +#define CL_RUNNING 0x1 +#define CL_SUBMITTED 0x2 +#define CL_QUEUED 0x3 + +/* cl_buffer_create_type */ +#ifdef CL_VERSION_1_1 +#define CL_BUFFER_CREATE_TYPE_REGION 0x1220 +#endif + +/* cl_profiling_info */ +#define CL_PROFILING_COMMAND_QUEUED 0x1280 +#define CL_PROFILING_COMMAND_SUBMIT 0x1281 +#define CL_PROFILING_COMMAND_START 0x1282 +#define CL_PROFILING_COMMAND_END 0x1283 +#ifdef CL_VERSION_2_0 +#define CL_PROFILING_COMMAND_COMPLETE 0x1284 +#endif + +/* cl_device_atomic_capabilities - bitfield */ +#ifdef CL_VERSION_3_0 +#define CL_DEVICE_ATOMIC_ORDER_RELAXED (1 << 0) +#define CL_DEVICE_ATOMIC_ORDER_ACQ_REL (1 << 1) +#define CL_DEVICE_ATOMIC_ORDER_SEQ_CST (1 << 2) +#define CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM (1 << 3) +#define CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP (1 << 4) +#define CL_DEVICE_ATOMIC_SCOPE_DEVICE (1 << 5) +#define CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES (1 << 6) +#endif + +/* cl_khronos_vendor_id */ +#define CL_KHRONOS_VENDOR_ID_CODEPLAY 0x10004 + +#ifdef CL_VERSION_3_0 + +/* cl_version */ +#define CL_VERSION_MAJOR_BITS (10) +#define CL_VERSION_MINOR_BITS (10) +#define CL_VERSION_PATCH_BITS (12) + +#define CL_VERSION_MAJOR_MASK ((1 << CL_VERSION_MAJOR_BITS) - 1) +#define CL_VERSION_MINOR_MASK ((1 << CL_VERSION_MINOR_BITS) - 1) +#define CL_VERSION_PATCH_MASK ((1 << CL_VERSION_PATCH_BITS) - 1) + +#define CL_VERSION_MAJOR(version) \ + ((version) >> (CL_VERSION_MINOR_BITS + CL_VERSION_PATCH_BITS)) + +#define CL_VERSION_MINOR(version) \ + (((version) >> CL_VERSION_PATCH_BITS) & CL_VERSION_MINOR_MASK) + +#define CL_VERSION_PATCH(version) ((version) & CL_VERSION_PATCH_MASK) + +#define CL_MAKE_VERSION(major, minor, patch) \ + ((((major) & CL_VERSION_MAJOR_MASK) \ + << (CL_VERSION_MINOR_BITS + CL_VERSION_PATCH_BITS)) | \ + (((minor) & CL_VERSION_MINOR_MASK) << CL_VERSION_PATCH_BITS) | \ + ((patch) & CL_VERSION_PATCH_MASK)) + +#endif + +/********************************************************************************************************/ + +/* Platform API */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformIDs(cl_uint num_entries, + cl_platform_id * platforms, + cl_uint * num_platforms) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformInfo(cl_platform_id platform, + cl_platform_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Device APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDs(cl_platform_id platform, + cl_device_type device_type, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceInfo(cl_device_id device, + cl_device_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateSubDevices(cl_device_id in_device, + const cl_device_partition_property * properties, + cl_uint num_devices, + cl_device_id * out_devices, + cl_uint * num_devices_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetDefaultDeviceCommandQueue(cl_context context, + cl_device_id device, + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceAndHostTimer(cl_device_id device, + cl_ulong* device_timestamp, + cl_ulong* host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetHostTimer(cl_device_id device, + cl_ulong * host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +#endif + +/* Context APIs */ +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContext(const cl_context_properties * properties, + cl_uint num_devices, + const cl_device_id * devices, + void (CL_CALLBACK * pfn_notify)(const char * errinfo, + const void * private_info, + size_t cb, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContextFromType(const cl_context_properties * properties, + cl_device_type device_type, + void (CL_CALLBACK * pfn_notify)(const char * errinfo, + const void * private_info, + size_t cb, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainContext(cl_context context) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseContext(cl_context context) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetContextInfo(cl_context context, + cl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Command Queue APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_command_queue CL_API_CALL +clCreateCommandQueueWithProperties(cl_context context, + cl_device_id device, + const cl_queue_properties * properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetCommandQueueInfo(cl_command_queue command_queue, + cl_command_queue_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Memory Object APIs */ +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBuffer(cl_context context, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateSubBuffer(cl_mem buffer, + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void * buffer_create_info, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateImage(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + const cl_image_desc * image_desc, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreatePipe(cl_context context, + cl_mem_flags flags, + cl_uint pipe_packet_size, + cl_uint pipe_max_packets, + const cl_pipe_properties * properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +#ifdef CL_VERSION_3_0 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBufferWithProperties(cl_context context, + const cl_mem_properties * properties, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_3_0; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateImageWithProperties(cl_context context, + const cl_mem_properties * properties, + cl_mem_flags flags, + const cl_image_format * image_format, + const cl_image_desc * image_desc, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_3_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedImageFormats(cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_image_format * image_formats, + cl_uint * num_image_formats) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMemObjectInfo(cl_mem memobj, + cl_mem_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetImageInfo(cl_mem image, + cl_image_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPipeInfo(cl_mem pipe, + cl_pipe_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetMemObjectDestructorCallback(cl_mem memobj, + void (CL_CALLBACK * pfn_notify)(cl_mem memobj, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_1; + +#endif + +/* SVM Allocation APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY void * CL_API_CALL +clSVMAlloc(cl_context context, + cl_svm_mem_flags flags, + size_t size, + cl_uint alignment) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY void CL_API_CALL +clSVMFree(cl_context context, + void * svm_pointer) CL_API_SUFFIX__VERSION_2_0; + +#endif + +/* Sampler APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_sampler CL_API_CALL +clCreateSamplerWithProperties(cl_context context, + const cl_sampler_properties * sampler_properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSamplerInfo(cl_sampler sampler, + cl_sampler_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Program Object APIs */ +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithSource(cl_context context, + cl_uint count, + const char ** strings, + const size_t * lengths, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBinary(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const size_t * lengths, + const unsigned char ** binaries, + cl_int * binary_status, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBuiltInKernels(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const char * kernel_names, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithIL(cl_context context, + const void* il, + size_t length, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clBuildProgram(cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clCompileProgram(cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + cl_uint num_input_headers, + const cl_program * input_headers, + const char ** header_include_names, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_program CL_API_CALL +clLinkProgram(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + cl_uint num_input_programs, + const cl_program * input_programs, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetProgramReleaseCallback(cl_program program, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_2_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetProgramSpecializationConstant(cl_program program, + cl_uint spec_id, + size_t spec_size, + const void* spec_value) CL_API_SUFFIX__VERSION_2_2; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clUnloadPlatformCompiler(cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramInfo(cl_program program, + cl_program_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramBuildInfo(cl_program program, + cl_device_id device, + cl_program_build_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Kernel Object APIs */ +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCreateKernel(cl_program program, + const char * kernel_name, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateKernelsInProgram(cl_program program, + cl_uint num_kernels, + cl_kernel * kernels, + cl_uint * num_kernels_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCloneKernel(cl_kernel source_kernel, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArg(cl_kernel kernel, + cl_uint arg_index, + size_t arg_size, + const void * arg_value) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArgSVMPointer(cl_kernel kernel, + cl_uint arg_index, + const void * arg_value) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelExecInfo(cl_kernel kernel, + cl_kernel_exec_info param_name, + size_t param_value_size, + const void * param_value) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelInfo(cl_kernel kernel, + cl_kernel_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelArgInfo(cl_kernel kernel, + cl_uint arg_indx, + cl_kernel_arg_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelWorkGroupInfo(cl_kernel kernel, + cl_device_id device, + cl_kernel_work_group_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelSubGroupInfo(cl_kernel kernel, + cl_device_id device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void* input_value, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +/* Event Object APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clWaitForEvents(cl_uint num_events, + const cl_event * event_list) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventInfo(cl_event event, + cl_event_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateUserEvent(cl_context context, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetUserEventStatus(cl_event event, + cl_int execution_status) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetEventCallback(cl_event event, + cl_int command_exec_callback_type, + void (CL_CALLBACK * pfn_notify)(cl_event event, + cl_int event_command_status, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_1; + +#endif + +/* Profiling APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventProfilingInfo(cl_event event, + cl_profiling_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Flush and Finish APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clFlush(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clFinish(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +/* Enqueued Commands APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + size_t offset, + size_t size, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBufferRect(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + const size_t * buffer_offset, + const size_t * host_offset, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + size_t offset, + size_t size, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBufferRect(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + const size_t * buffer_offset, + const size_t * host_offset, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillBuffer(cl_command_queue command_queue, + cl_mem buffer, + const void * pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBuffer(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferRect(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t * src_origin, + const size_t * dst_origin, + const size_t * region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_read, + const size_t * origin, + const size_t * region, + size_t row_pitch, + size_t slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_write, + const size_t * origin, + const size_t * region, + size_t input_row_pitch, + size_t input_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillImage(cl_command_queue command_queue, + cl_mem image, + const void * fill_color, + const size_t * origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImage(cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t * src_origin, + const size_t * dst_origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImageToBuffer(cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t * src_origin, + const size_t * region, + size_t dst_offset, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferToImage(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t * dst_origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_map, + cl_map_flags map_flags, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_map, + cl_map_flags map_flags, + const size_t * origin, + const size_t * region, + size_t * image_row_pitch, + size_t * image_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueUnmapMemObject(cl_command_queue command_queue, + cl_mem memobj, + void * mapped_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemObjects(cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem * mem_objects, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNDRangeKernel(cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t * global_work_offset, + const size_t * global_work_size, + const size_t * local_work_size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNativeKernel(cl_command_queue command_queue, + void (CL_CALLBACK * user_func)(void *), + void * args, + size_t cb_args, + cl_uint num_mem_objects, + const cl_mem * mem_list, + const void ** args_mem_loc, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMarkerWithWaitList(cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueBarrierWithWaitList(cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMFree(cl_command_queue command_queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void * user_data), + void * user_data, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemcpy(cl_command_queue command_queue, + cl_bool blocking_copy, + void * dst_ptr, + const void * src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemFill(cl_command_queue command_queue, + void * svm_ptr, + const void * pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMap(cl_command_queue command_queue, + cl_bool blocking_map, + cl_map_flags flags, + void * svm_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMUnmap(cl_command_queue command_queue, + void * svm_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMigrateMem(cl_command_queue command_queue, + cl_uint num_svm_pointers, + const void ** svm_pointers, + const size_t * sizes, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_1; + +#endif + +#ifdef CL_VERSION_1_2 + +/* Extension function access + * + * Returns the extension function address for the given function name, + * or NULL if a valid function can not be found. The client must + * check to make sure the address is not NULL, before using or + * calling the returned function address. + */ +extern CL_API_ENTRY void * CL_API_CALL +clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, + const char * func_name) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + /* + * WARNING: + * This API introduces mutable state into the OpenCL implementation. It has been REMOVED + * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the + * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably. + * It is likely to be non-performant. Use of this API is not advised. Use at your own risk. + * + * Software developers previously relying on this API are instructed to set the command queue + * properties when creating the queue, instead. + */ + extern CL_API_ENTRY cl_int CL_API_CALL + clSetCommandQueueProperty(cl_command_queue command_queue, + cl_command_queue_properties properties, + cl_bool enable, + cl_command_queue_properties * old_properties) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; +#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ + +/* Deprecated OpenCL 1.1 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage2D(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + size_t image_width, + size_t image_height, + size_t image_row_pitch, + void * host_ptr, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage3D(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + size_t image_width, + size_t image_height, + size_t image_depth, + size_t image_row_pitch, + size_t image_slice_pitch, + void * host_ptr, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueMarker(cl_command_queue command_queue, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueWaitForEvents(cl_command_queue command_queue, + cl_uint num_events, + const cl_event * event_list) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueBarrier(cl_command_queue command_queue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL +clGetExtensionFunctionAddress(const char * func_name) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +/* Deprecated OpenCL 2.0 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL +clCreateCommandQueue(cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL +clCreateSampler(cl_context context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL +clEnqueueTask(cl_command_queue command_queue, + cl_kernel kernel, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_H */ diff --git a/CL/cl_d3d10.h b/CL/cl_d3d10.h new file mode 100644 index 0000000..cda5469 --- /dev/null +++ b/CL/cl_d3d10.h @@ -0,0 +1,117 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_CL_D3D10_H +#define __OPENCL_CL_D3D10_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/****************************************************************************** + * cl_khr_d3d10_sharing */ +#define cl_khr_d3d10_sharing 1 + +typedef cl_uint cl_d3d10_device_source_khr; +typedef cl_uint cl_d3d10_device_set_khr; + +/******************************************************************************/ + +/* Error Codes */ +#define CL_INVALID_D3D10_DEVICE_KHR -1002 +#define CL_INVALID_D3D10_RESOURCE_KHR -1003 +#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004 +#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005 + +/* cl_d3d10_device_source_nv */ +#define CL_D3D10_DEVICE_KHR 0x4010 +#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011 + +/* cl_d3d10_device_set_nv */ +#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012 +#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013 + +/* cl_context_info */ +#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014 +#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C + +/* cl_mem_info */ +#define CL_MEM_D3D10_RESOURCE_KHR 0x4015 + +/* cl_image_info */ +#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016 + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017 +#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018 + +/******************************************************************************/ + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)( + cl_platform_id platform, + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D10Buffer * resource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D10Texture2D * resource, + UINT subresource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D10Texture3D * resource, + UINT subresource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_D3D10_H */ + diff --git a/CL/cl_d3d11.h b/CL/cl_d3d11.h new file mode 100644 index 0000000..6b7e2e9 --- /dev/null +++ b/CL/cl_d3d11.h @@ -0,0 +1,117 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_CL_D3D11_H +#define __OPENCL_CL_D3D11_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/****************************************************************************** + * cl_khr_d3d11_sharing */ +#define cl_khr_d3d11_sharing 1 + +typedef cl_uint cl_d3d11_device_source_khr; +typedef cl_uint cl_d3d11_device_set_khr; + +/******************************************************************************/ + +/* Error Codes */ +#define CL_INVALID_D3D11_DEVICE_KHR -1006 +#define CL_INVALID_D3D11_RESOURCE_KHR -1007 +#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008 +#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009 + +/* cl_d3d11_device_source */ +#define CL_D3D11_DEVICE_KHR 0x4019 +#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A + +/* cl_d3d11_device_set */ +#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B +#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C + +/* cl_context_info */ +#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D +#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D + +/* cl_mem_info */ +#define CL_MEM_D3D11_RESOURCE_KHR 0x401E + +/* cl_image_info */ +#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020 +#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021 + +/******************************************************************************/ + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)( + cl_platform_id platform, + cl_d3d11_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d11_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D11Buffer * resource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D11Texture2D * resource, + UINT subresource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)( + cl_context context, + cl_mem_flags flags, + ID3D11Texture3D * resource, + UINT subresource, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_D3D11_H */ + diff --git a/CL/cl_dx9_media_sharing.h b/CL/cl_dx9_media_sharing.h new file mode 100644 index 0000000..0489370 --- /dev/null +++ b/CL/cl_dx9_media_sharing.h @@ -0,0 +1,118 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H +#define __OPENCL_CL_DX9_MEDIA_SHARING_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/******************************************************************************/ +/* cl_khr_dx9_media_sharing */ +#define cl_khr_dx9_media_sharing 1 + +typedef cl_uint cl_dx9_media_adapter_type_khr; +typedef cl_uint cl_dx9_media_adapter_set_khr; + +#if defined(_WIN32) +#include +typedef struct _cl_dx9_surface_info_khr +{ + IDirect3DSurface9 *resource; + HANDLE shared_handle; +} cl_dx9_surface_info_khr; +#endif + + +/******************************************************************************/ + +/* Error Codes */ +#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010 +#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011 +#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012 +#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013 + +/* cl_media_adapter_type_khr */ +#define CL_ADAPTER_D3D9_KHR 0x2020 +#define CL_ADAPTER_D3D9EX_KHR 0x2021 +#define CL_ADAPTER_DXVA_KHR 0x2022 + +/* cl_media_adapter_set_khr */ +#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023 +#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024 + +/* cl_context_info */ +#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025 +#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026 +#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027 + +/* cl_mem_info */ +#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028 +#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029 + +/* cl_image_info */ +#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B +#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C + +/******************************************************************************/ + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)( + cl_platform_id platform, + cl_uint num_media_adapters, + cl_dx9_media_adapter_type_khr * media_adapter_type, + void * media_adapters, + cl_dx9_media_adapter_set_khr media_adapter_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)( + cl_context context, + cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, + void * surface_info, + cl_uint plane, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */ + diff --git a/CL/cl_dx9_media_sharing_intel.h b/CL/cl_dx9_media_sharing_intel.h new file mode 100644 index 0000000..4525a17 --- /dev/null +++ b/CL/cl_dx9_media_sharing_intel.h @@ -0,0 +1,170 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +/*****************************************************************************\ + +Copyright (c) 2013-2019 Intel Corporation All Rights Reserved. + +THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE +MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +File Name: cl_dx9_media_sharing_intel.h + +Abstract: + +Notes: + +\*****************************************************************************/ + +#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H +#define __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/*************************************** +* cl_intel_dx9_media_sharing extension * +****************************************/ + +#define cl_intel_dx9_media_sharing 1 + +typedef cl_uint cl_dx9_device_source_intel; +typedef cl_uint cl_dx9_device_set_intel; + +/* error codes */ +#define CL_INVALID_DX9_DEVICE_INTEL -1010 +#define CL_INVALID_DX9_RESOURCE_INTEL -1011 +#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012 +#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013 + +/* cl_dx9_device_source_intel */ +#define CL_D3D9_DEVICE_INTEL 0x4022 +#define CL_D3D9EX_DEVICE_INTEL 0x4070 +#define CL_DXVA_DEVICE_INTEL 0x4071 + +/* cl_dx9_device_set_intel */ +#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024 +#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025 + +/* cl_context_info */ +#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026 +#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072 +#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073 + +/* cl_mem_info */ +#define CL_MEM_DX9_RESOURCE_INTEL 0x4027 +#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074 + +/* cl_image_info */ +#define CL_IMAGE_DX9_PLANE_INTEL 0x4075 + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A +#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B +/******************************************************************************/ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDsFromDX9INTEL( + cl_platform_id platform, + cl_dx9_device_source_intel dx9_device_source, + void* dx9_object, + cl_dx9_device_set_intel dx9_device_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)( + cl_platform_id platform, + cl_dx9_device_source_intel dx9_device_source, + void* dx9_object, + cl_dx9_device_set_intel dx9_device_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromDX9MediaSurfaceINTEL( + cl_context context, + cl_mem_flags flags, + IDirect3DSurface9* resource, + HANDLE sharedHandle, + UINT plane, + cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)( + cl_context context, + cl_mem_flags flags, + IDirect3DSurface9* resource, + HANDLE sharedHandle, + UINT plane, + cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireDX9ObjectsINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseDX9ObjectsINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H */ + diff --git a/CL/cl_egl.h b/CL/cl_egl.h new file mode 100644 index 0000000..c8bde80 --- /dev/null +++ b/CL/cl_egl.h @@ -0,0 +1,120 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_CL_EGL_H +#define __OPENCL_CL_EGL_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */ +#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F +#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D +#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E + +/* Error type for clCreateFromEGLImageKHR */ +#define CL_INVALID_EGL_OBJECT_KHR -1093 +#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092 + +/* CLeglImageKHR is an opaque handle to an EGLImage */ +typedef void* CLeglImageKHR; + +/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */ +typedef void* CLeglDisplayKHR; + +/* CLeglSyncKHR is an opaque handle to an EGLSync object */ +typedef void* CLeglSyncKHR; + +/* properties passed to clCreateFromEGLImageKHR */ +typedef intptr_t cl_egl_image_properties_khr; + + +#define cl_khr_egl_image 1 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromEGLImageKHR(cl_context context, + CLeglDisplayKHR egldisplay, + CLeglImageKHR eglimage, + cl_mem_flags flags, + const cl_egl_image_properties_khr * properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)( + cl_context context, + CLeglDisplayKHR egldisplay, + CLeglImageKHR eglimage, + cl_mem_flags flags, + const cl_egl_image_properties_khr * properties, + cl_int * errcode_ret); + + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +#define cl_khr_egl_event 1 + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateEventFromEGLSyncKHR(cl_context context, + CLeglSyncKHR sync, + CLeglDisplayKHR display, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)( + cl_context context, + CLeglSyncKHR sync, + CLeglDisplayKHR display, + cl_int * errcode_ret); + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_EGL_H */ diff --git a/CL/cl_ext.h b/CL/cl_ext.h new file mode 100644 index 0000000..cd86843 --- /dev/null +++ b/CL/cl_ext.h @@ -0,0 +1,841 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +/* cl_ext.h contains OpenCL extensions which don't have external */ +/* (OpenGL, D3D) dependencies. */ + +#ifndef __CL_EXT_H +#define __CL_EXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* cl_khr_fp64 extension - no extension #define since it has no functions */ +/* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */ + +#if CL_TARGET_OPENCL_VERSION <= 110 +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +#endif + +/* cl_khr_fp16 extension - no extension #define since it has no functions */ +#define CL_DEVICE_HALF_FP_CONFIG 0x1033 + +/* Memory object destruction + * + * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR + * + * Registers a user callback function that will be called when the memory object is deleted and its resources + * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback + * stack associated with memobj. The registered user callback functions are called in the reverse order in + * which they were registered. The user callback functions are called and then the memory object is deleted + * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be + * notified when the memory referenced by host_ptr, specified when the memory object is created and used as + * the storage bits for the memory object, can be reused or freed. + * + * The application may not call CL api's with the cl_mem object passed to the pfn_notify. + * + * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) + * before using. + */ +#define cl_APPLE_SetMemObjectDestructor 1 +cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem memobj, + void (* pfn_notify)(cl_mem memobj, void * user_data), + void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + + +/* Context Logging Functions + * + * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext(). + * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) + * before using. + * + * clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger + */ +#define cl_APPLE_ContextLoggingFunctions 1 +extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * errstr, + const void * private_info, + size_t cb, + void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + +/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */ +extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * errstr, + const void * private_info, + size_t cb, + void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + +/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */ +extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * errstr, + const void * private_info, + size_t cb, + void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + + +/************************ +* cl_khr_icd extension * +************************/ +#define cl_khr_icd 1 + +/* cl_platform_info */ +#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 + +/* Additional Error Codes */ +#define CL_PLATFORM_NOT_FOUND_KHR -1001 + +extern CL_API_ENTRY cl_int CL_API_CALL +clIcdGetPlatformIDsKHR(cl_uint num_entries, + cl_platform_id * platforms, + cl_uint * num_platforms); + +typedef CL_API_ENTRY cl_int +(CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint num_entries, + cl_platform_id * platforms, + cl_uint * num_platforms); + + +/******************************* + * cl_khr_il_program extension * + *******************************/ +#define cl_khr_il_program 1 + +/* New property to clGetDeviceInfo for retrieving supported intermediate + * languages + */ +#define CL_DEVICE_IL_VERSION_KHR 0x105B + +/* New property to clGetProgramInfo for retrieving for retrieving the IL of a + * program + */ +#define CL_PROGRAM_IL_KHR 0x1169 + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithILKHR(cl_context context, + const void * il, + size_t length, + cl_int * errcode_ret); + +typedef CL_API_ENTRY cl_program +(CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context context, + const void * il, + size_t length, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + +/* Extension: cl_khr_image2d_from_buffer + * + * This extension allows a 2D image to be created from a cl_mem buffer without + * a copy. The type associated with a 2D image created from a buffer in an + * OpenCL program is image2d_t. Both the sampler and sampler-less read_image + * built-in functions are supported for 2D images and 2D images created from + * a buffer. Similarly, the write_image built-ins are also supported for 2D + * images created from a buffer. + * + * When the 2D image from buffer is created, the client must specify the + * width, height, image format (i.e. channel order and channel data type) + * and optionally the row pitch. + * + * The pitch specified must be a multiple of + * CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR pixels. + * The base address of the buffer must be aligned to + * CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR pixels. + */ + +#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR 0x104A +#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR 0x104B + + +/************************************** + * cl_khr_initialize_memory extension * + **************************************/ + +#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030 + + +/************************************** + * cl_khr_terminate_context extension * + **************************************/ + +#define CL_CONTEXT_TERMINATED_KHR -1121 + +#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031 +#define CL_CONTEXT_TERMINATE_KHR 0x2032 + +#define cl_khr_terminate_context 1 +extern CL_API_ENTRY cl_int CL_API_CALL +clTerminateContextKHR(cl_context context) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_EXT_SUFFIX__VERSION_1_2; + + +/* + * Extension: cl_khr_spir + * + * This extension adds support to create an OpenCL program object from a + * Standard Portable Intermediate Representation (SPIR) instance + */ + +#define CL_DEVICE_SPIR_VERSIONS 0x40E0 +#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1 + + +/***************************************** + * cl_khr_create_command_queue extension * + *****************************************/ +#define cl_khr_create_command_queue 1 + +typedef cl_bitfield cl_queue_properties_khr; + +extern CL_API_ENTRY cl_command_queue CL_API_CALL +clCreateCommandQueueWithPropertiesKHR(cl_context context, + cl_device_id device, + const cl_queue_properties_khr* properties, + cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_command_queue +(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context, + cl_device_id device, + const cl_queue_properties_khr* properties, + cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + + +/****************************************** +* cl_nv_device_attribute_query extension * +******************************************/ + +/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ +#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 +#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 +#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 +#define CL_DEVICE_WARP_SIZE_NV 0x4003 +#define CL_DEVICE_GPU_OVERLAP_NV 0x4004 +#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 +#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 + + +/********************************* +* cl_amd_device_attribute_query * +*********************************/ + +#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 +#define CL_DEVICE_TOPOLOGY_AMD 0x4037 +#define CL_DEVICE_BOARD_NAME_AMD 0x4038 +#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039 +#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040 +#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041 +#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042 +#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 +#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044 +#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045 +#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046 +#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047 +#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048 +#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049 +#define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A +#define CL_DEVICE_GFXIP_MINOR_AMD 0x404B +#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C +#define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD 0x4030 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD 0x4031 +#define CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD 0x4033 +#define CL_DEVICE_PCIE_ID_AMD 0x4034 + + +/********************************* +* cl_arm_printf extension +*********************************/ + +#define CL_PRINTF_CALLBACK_ARM 0x40B0 +#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1 + + +/*********************************** +* cl_ext_device_fission extension +***********************************/ +#define cl_ext_device_fission 1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; + +typedef cl_ulong cl_device_partition_property_ext; +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateSubDevicesEXT(cl_device_id in_device, + const cl_device_partition_property_ext * properties, + cl_uint num_entries, + cl_device_id * out_devices, + cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id in_device, + const cl_device_partition_property_ext * properties, + cl_uint num_entries, + cl_device_id * out_devices, + cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1; + +/* cl_device_partition_property_ext */ +#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 +#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 +#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 + +/* clDeviceGetInfo selectors */ +#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 +#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 +#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 +#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 +#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 + +/* error codes */ +#define CL_DEVICE_PARTITION_FAILED_EXT -1057 +#define CL_INVALID_PARTITION_COUNT_EXT -1058 +#define CL_INVALID_PARTITION_NAME_EXT -1059 + +/* CL_AFFINITY_DOMAINs */ +#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 +#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 +#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 +#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 +#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 +#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 + +/* cl_device_partition_property_ext list terminators */ +#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0) +#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0) +#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1) + + +/*********************************** + * cl_ext_migrate_memobject extension definitions + ***********************************/ +#define cl_ext_migrate_memobject 1 + +typedef cl_bitfield cl_mem_migration_flags_ext; + +#define CL_MIGRATE_MEM_OBJECT_HOST_EXT 0x1 + +#define CL_COMMAND_MIGRATE_MEM_OBJECT_EXT 0x4040 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem * mem_objects, + cl_mem_migration_flags_ext flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + +typedef CL_API_ENTRY cl_int +(CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem * mem_objects, + cl_mem_migration_flags_ext flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +/********************************* +* cl_qcom_ext_host_ptr extension +*********************************/ +#define cl_qcom_ext_host_ptr 1 + +#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29) + +#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 +#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 +#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2 +#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3 +#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4 +#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5 +#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6 +#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7 + +typedef cl_uint cl_image_pitch_info_qcom; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceImageInfoQCOM(cl_device_id device, + size_t image_width, + size_t image_height, + const cl_image_format *image_format, + cl_image_pitch_info_qcom param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +typedef struct _cl_mem_ext_host_ptr +{ + /* Type of external memory allocation. */ + /* Legal values will be defined in layered extensions. */ + cl_uint allocation_type; + + /* Host cache policy for this external memory allocation. */ + cl_uint host_cache_policy; + +} cl_mem_ext_host_ptr; + + +/******************************************* +* cl_qcom_ext_host_ptr_iocoherent extension +********************************************/ + +/* Cache policy specifying io-coherence */ +#define CL_MEM_HOST_IOCOHERENT_QCOM 0x40A9 + + +/********************************* +* cl_qcom_ion_host_ptr extension +*********************************/ + +#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8 + +typedef struct _cl_mem_ion_host_ptr +{ + /* Type of external memory allocation. */ + /* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */ + cl_mem_ext_host_ptr ext_host_ptr; + + /* ION file descriptor */ + int ion_filedesc; + + /* Host pointer to the ION allocated memory */ + void* ion_hostptr; + +} cl_mem_ion_host_ptr; + + +/********************************* +* cl_qcom_android_native_buffer_host_ptr extension +*********************************/ + +#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6 + +typedef struct _cl_mem_android_native_buffer_host_ptr +{ + /* Type of external memory allocation. */ + /* Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. */ + cl_mem_ext_host_ptr ext_host_ptr; + + /* Virtual pointer to the android native buffer */ + void* anb_ptr; + +} cl_mem_android_native_buffer_host_ptr; + + +/****************************************** + * cl_img_yuv_image extension * + ******************************************/ + +/* Image formats used in clCreateImage */ +#define CL_NV21_IMG 0x40D0 +#define CL_YV12_IMG 0x40D1 + + +/****************************************** + * cl_img_cached_allocations extension * + ******************************************/ + +/* Flag values used by clCreateBuffer */ +#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG (1 << 26) +#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG (1 << 27) + + +/****************************************** + * cl_img_use_gralloc_ptr extension * + ******************************************/ +#define cl_img_use_gralloc_ptr 1 + +/* Flag values used by clCreateBuffer */ +#define CL_MEM_USE_GRALLOC_PTR_IMG (1 << 28) + +/* To be used by clGetEventInfo: */ +#define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG 0x40D2 +#define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG 0x40D3 + +/* Error code from clEnqueueReleaseGrallocObjectsIMG */ +#define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG 0x40D4 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireGrallocObjectsIMG(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseGrallocObjectsIMG(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + + +/********************************* +* cl_khr_subgroups extension +*********************************/ +#define cl_khr_subgroups 1 + +#if !defined(CL_VERSION_2_1) +/* For OpenCL 2.1 and newer, cl_kernel_sub_group_info is declared in CL.h. + In hindsight, there should have been a khr suffix on this type for + the extension, but keeping it un-suffixed to maintain backwards + compatibility. */ +typedef cl_uint cl_kernel_sub_group_info; +#endif + +/* cl_kernel_sub_group_info */ +#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033 +#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelSubGroupInfoKHR(cl_kernel in_kernel, + cl_device_id in_device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void * input_value, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel in_kernel, + cl_device_id in_device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void * input_value, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED; + + +/********************************* +* cl_khr_mipmap_image extension +*********************************/ + +/* cl_sampler_properties */ +#define CL_SAMPLER_MIP_FILTER_MODE_KHR 0x1155 +#define CL_SAMPLER_LOD_MIN_KHR 0x1156 +#define CL_SAMPLER_LOD_MAX_KHR 0x1157 + + +/********************************* +* cl_khr_priority_hints extension +*********************************/ +/* This extension define is for backwards compatibility. + It shouldn't be required since this extension has no new functions. */ +#define cl_khr_priority_hints 1 + +typedef cl_uint cl_queue_priority_khr; + +/* cl_command_queue_properties */ +#define CL_QUEUE_PRIORITY_KHR 0x1096 + +/* cl_queue_priority_khr */ +#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0) +#define CL_QUEUE_PRIORITY_MED_KHR (1<<1) +#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2) + + +/********************************* +* cl_khr_throttle_hints extension +*********************************/ +/* This extension define is for backwards compatibility. + It shouldn't be required since this extension has no new functions. */ +#define cl_khr_throttle_hints 1 + +typedef cl_uint cl_queue_throttle_khr; + +/* cl_command_queue_properties */ +#define CL_QUEUE_THROTTLE_KHR 0x1097 + +/* cl_queue_throttle_khr */ +#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0) +#define CL_QUEUE_THROTTLE_MED_KHR (1<<1) +#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2) + + +/********************************* +* cl_khr_subgroup_named_barrier +*********************************/ +/* This extension define is for backwards compatibility. + It shouldn't be required since this extension has no new functions. */ +#define cl_khr_subgroup_named_barrier 1 + +/* cl_device_info */ +#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035 + + +/********************************* +* cl_khr_extended_versioning +*********************************/ + +#define cl_khr_extended_versioning 1 + +#define CL_VERSION_MAJOR_BITS_KHR (10) +#define CL_VERSION_MINOR_BITS_KHR (10) +#define CL_VERSION_PATCH_BITS_KHR (12) + +#define CL_VERSION_MAJOR_MASK_KHR ((1 << CL_VERSION_MAJOR_BITS_KHR) - 1) +#define CL_VERSION_MINOR_MASK_KHR ((1 << CL_VERSION_MINOR_BITS_KHR) - 1) +#define CL_VERSION_PATCH_MASK_KHR ((1 << CL_VERSION_PATCH_BITS_KHR) - 1) + +#define CL_VERSION_MAJOR_KHR(version) ((version) >> (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) +#define CL_VERSION_MINOR_KHR(version) (((version) >> CL_VERSION_PATCH_BITS_KHR) & CL_VERSION_MINOR_MASK_KHR) +#define CL_VERSION_PATCH_KHR(version) ((version) & CL_VERSION_PATCH_MASK_KHR) + +#define CL_MAKE_VERSION_KHR(major, minor, patch) \ + ((((major) & CL_VERSION_MAJOR_MASK_KHR) << (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) | \ + (((minor) & CL_VERSION_MINOR_MASK_KHR) << CL_VERSION_PATCH_BITS_KHR) | \ + ((patch) & CL_VERSION_PATCH_MASK_KHR)) + +typedef cl_uint cl_version_khr; + +#define CL_NAME_VERSION_MAX_NAME_SIZE_KHR 64 + +typedef struct _cl_name_version_khr +{ + cl_version_khr version; + char name[CL_NAME_VERSION_MAX_NAME_SIZE_KHR]; +} cl_name_version_khr; + +/* cl_platform_info */ +#define CL_PLATFORM_NUMERIC_VERSION_KHR 0x0906 +#define CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR 0x0907 + +/* cl_device_info */ +#define CL_DEVICE_NUMERIC_VERSION_KHR 0x105E +#define CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR 0x105F +#define CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR 0x1060 +#define CL_DEVICE_ILS_WITH_VERSION_KHR 0x1061 +#define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR 0x1062 + + +/********************************* +* cl_khr_device_uuid extension +*********************************/ +#define cl_khr_device_uuid 1 + +#define CL_UUID_SIZE_KHR 16 +#define CL_LUID_SIZE_KHR 8 + +#define CL_DEVICE_UUID_KHR 0x106A +#define CL_DRIVER_UUID_KHR 0x106B +#define CL_DEVICE_LUID_VALID_KHR 0x106C +#define CL_DEVICE_LUID_KHR 0x106D +#define CL_DEVICE_NODE_MASK_KHR 0x106E + + +/********************************** + * cl_arm_import_memory extension * + **********************************/ +#define cl_arm_import_memory 1 + +typedef intptr_t cl_import_properties_arm; + +/* Default and valid proporties name for cl_arm_import_memory */ +#define CL_IMPORT_TYPE_ARM 0x40B2 + +/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */ +#define CL_IMPORT_TYPE_HOST_ARM 0x40B3 + +/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */ +#define CL_IMPORT_TYPE_DMA_BUF_ARM 0x40B4 + +/* Protected memory property */ +#define CL_IMPORT_TYPE_PROTECTED_ARM 0x40B5 + +/* Android hardware buffer type value for CL_IMPORT_TYPE_ARM property */ +#define CL_IMPORT_TYPE_ANDROID_HARDWARE_BUFFER_ARM 0x41E2 + +/* Data consistency with host property */ +#define CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM 0x41E3 + +/* Import memory size value to indicate a size for the whole buffer */ +#define CL_IMPORT_MEMORY_WHOLE_ALLOCATION_ARM SIZE_MAX + +/* This extension adds a new function that allows for direct memory import into + * OpenCL via the clImportMemoryARM function. + * + * Memory imported through this interface will be mapped into the device's page + * tables directly, providing zero copy access. It will never fall back to copy + * operations and aliased buffers. + * + * Types of memory supported for import are specified as additional extension + * strings. + * + * This extension produces cl_mem allocations which are compatible with all other + * users of cl_mem in the standard API. + * + * This extension maps pages with the same properties as the normal buffer creation + * function clCreateBuffer. + */ +extern CL_API_ENTRY cl_mem CL_API_CALL +clImportMemoryARM( cl_context context, + cl_mem_flags flags, + const cl_import_properties_arm *properties, + void *memory, + size_t size, + cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0; + + +/****************************************** + * cl_arm_shared_virtual_memory extension * + ******************************************/ +#define cl_arm_shared_virtual_memory 1 + +/* Used by clGetDeviceInfo */ +#define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6 + +/* Used by clGetMemObjectInfo */ +#define CL_MEM_USES_SVM_POINTER_ARM 0x40B7 + +/* Used by clSetKernelExecInfoARM: */ +#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM 0x40B8 +#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM 0x40B9 + +/* To be used by clGetEventInfo: */ +#define CL_COMMAND_SVM_FREE_ARM 0x40BA +#define CL_COMMAND_SVM_MEMCPY_ARM 0x40BB +#define CL_COMMAND_SVM_MEMFILL_ARM 0x40BC +#define CL_COMMAND_SVM_MAP_ARM 0x40BD +#define CL_COMMAND_SVM_UNMAP_ARM 0x40BE + +/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */ +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM (1 << 2) +#define CL_DEVICE_SVM_ATOMICS_ARM (1 << 3) + +/* Flag values used by clSVMAllocARM: */ +#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM (1 << 10) +#define CL_MEM_SVM_ATOMICS_ARM (1 << 11) + +typedef cl_bitfield cl_svm_mem_flags_arm; +typedef cl_uint cl_kernel_exec_info_arm; +typedef cl_bitfield cl_device_svm_capabilities_arm; + +extern CL_API_ENTRY void * CL_API_CALL +clSVMAllocARM(cl_context context, + cl_svm_mem_flags_arm flags, + size_t size, + cl_uint alignment) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY void CL_API_CALL +clSVMFreeARM(cl_context context, + void * svm_pointer) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMFreeARM(cl_command_queue command_queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void * user_data), + void * user_data, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemcpyARM(cl_command_queue command_queue, + cl_bool blocking_copy, + void * dst_ptr, + const void * src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemFillARM(cl_command_queue command_queue, + void * svm_ptr, + const void * pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMapARM(cl_command_queue command_queue, + cl_bool blocking_map, + cl_map_flags flags, + void * svm_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMUnmapARM(cl_command_queue command_queue, + void * svm_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArgSVMPointerARM(cl_kernel kernel, + cl_uint arg_index, + const void * arg_value) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelExecInfoARM(cl_kernel kernel, + cl_kernel_exec_info_arm param_name, + size_t param_value_size, + const void * param_value) CL_EXT_SUFFIX__VERSION_1_2; + +/******************************** + * cl_arm_get_core_id extension * + ********************************/ + +#ifdef CL_VERSION_1_2 + +#define cl_arm_get_core_id 1 + +/* Device info property for bitfield of cores present */ +#define CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM 0x40BF + +#endif /* CL_VERSION_1_2 */ + +/********************************* +* cl_arm_job_slot_selection +*********************************/ + +#define cl_arm_job_slot_selection 1 + +/* cl_device_info */ +#define CL_DEVICE_JOB_SLOTS_ARM 0x41E0 + +/* cl_command_queue_properties */ +#define CL_QUEUE_JOB_SLOT_ARM 0x41E1 + +#ifdef __cplusplus +} +#endif + + +#endif /* __CL_EXT_H */ diff --git a/CL/cl_ext_intel.h b/CL/cl_ext_intel.h new file mode 100644 index 0000000..f044684 --- /dev/null +++ b/CL/cl_ext_intel.h @@ -0,0 +1,682 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ******************************************************************************/ +/*****************************************************************************\ + +Copyright (c) 2013-2020 Intel Corporation All Rights Reserved. + +THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE +MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +File Name: cl_ext_intel.h + +Abstract: + +Notes: + +\*****************************************************************************/ + +#ifndef __CL_EXT_INTEL_H +#define __CL_EXT_INTEL_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/*************************************** +* cl_intel_thread_local_exec extension * +****************************************/ + +#define cl_intel_thread_local_exec 1 + +#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31) + +/*********************************************** +* cl_intel_device_partition_by_names extension * +************************************************/ + +#define cl_intel_device_partition_by_names 1 + +#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052 +#define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1 + +/************************************************ +* cl_intel_accelerator extension * +* cl_intel_motion_estimation extension * +* cl_intel_advanced_motion_estimation extension * +*************************************************/ + +#define cl_intel_accelerator 1 +#define cl_intel_motion_estimation 1 +#define cl_intel_advanced_motion_estimation 1 + +typedef struct _cl_accelerator_intel* cl_accelerator_intel; +typedef cl_uint cl_accelerator_type_intel; +typedef cl_uint cl_accelerator_info_intel; + +typedef struct _cl_motion_estimation_desc_intel { + cl_uint mb_block_type; + cl_uint subpixel_mode; + cl_uint sad_adjust_mode; + cl_uint search_path_type; +} cl_motion_estimation_desc_intel; + +/* error codes */ +#define CL_INVALID_ACCELERATOR_INTEL -1094 +#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095 +#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096 +#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097 + +/* cl_accelerator_type_intel */ +#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0 + +/* cl_accelerator_info_intel */ +#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090 +#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091 +#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092 +#define CL_ACCELERATOR_TYPE_INTEL 0x4093 + +/* cl_motion_detect_desc_intel flags */ +#define CL_ME_MB_TYPE_16x16_INTEL 0x0 +#define CL_ME_MB_TYPE_8x8_INTEL 0x1 +#define CL_ME_MB_TYPE_4x4_INTEL 0x2 + +#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 +#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 +#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2 + +#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 +#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1 + +#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0 +#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1 +#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5 + +#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0 +#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1 +#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2 +#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4 + +#define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1 +#define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2 +#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3 + +#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16 +#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21 +#define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32 +#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43 +#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48 + +#define CL_ME_COST_PENALTY_NONE_INTEL 0x0 +#define CL_ME_COST_PENALTY_LOW_INTEL 0x1 +#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2 +#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3 + +#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0 +#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1 +#define CL_ME_COST_PRECISION_PEL_INTEL 0x2 +#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3 + +#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 +#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 +#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 + +#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 +#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 +#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 +#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 +#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 +#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 + +#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 +#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 +#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 + +/* cl_device_info */ +#define CL_DEVICE_ME_VERSION_INTEL 0x407E + +#define CL_ME_VERSION_LEGACY_INTEL 0x0 +#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1 +#define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2 + +extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL +clCreateAcceleratorINTEL( + cl_context context, + cl_accelerator_type_intel accelerator_type, + size_t descriptor_size, + const void* descriptor, + cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)( + cl_context context, + cl_accelerator_type_intel accelerator_type, + size_t descriptor_size, + const void* descriptor, + cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetAcceleratorInfoINTEL( + cl_accelerator_intel accelerator, + cl_accelerator_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)( + cl_accelerator_intel accelerator, + cl_accelerator_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainAcceleratorINTEL( + cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)( + cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseAcceleratorINTEL( + cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)( + cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; + +/****************************************** +* cl_intel_simultaneous_sharing extension * +*******************************************/ + +#define cl_intel_simultaneous_sharing 1 + +#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104 +#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105 + +/*********************************** +* cl_intel_egl_image_yuv extension * +************************************/ + +#define cl_intel_egl_image_yuv 1 + +#define CL_EGL_YUV_PLANE_INTEL 0x4107 + +/******************************** +* cl_intel_packed_yuv extension * +*********************************/ + +#define cl_intel_packed_yuv 1 + +#define CL_YUYV_INTEL 0x4076 +#define CL_UYVY_INTEL 0x4077 +#define CL_YVYU_INTEL 0x4078 +#define CL_VYUY_INTEL 0x4079 + +/******************************************** +* cl_intel_required_subgroup_size extension * +*********************************************/ + +#define cl_intel_required_subgroup_size 1 + +#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108 +#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109 +#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A + +/**************************************** +* cl_intel_driver_diagnostics extension * +*****************************************/ + +#define cl_intel_driver_diagnostics 1 + +typedef cl_uint cl_diagnostics_verbose_level; + +#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106 + +#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff ) +#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 ) +#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 ) +#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 ) + +/******************************** +* cl_intel_planar_yuv extension * +*********************************/ + +#define CL_NV12_INTEL 0x410E + +#define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 ) +#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 ) + +#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E +#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F + +/******************************************************* +* cl_intel_device_side_avc_motion_estimation extension * +********************************************************/ + +#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B +#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C +#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D + +#define CL_AVC_ME_VERSION_0_INTEL 0x0 /* No support. */ +#define CL_AVC_ME_VERSION_1_INTEL 0x1 /* First supported version. */ + +#define CL_AVC_ME_MAJOR_16x16_INTEL 0x0 +#define CL_AVC_ME_MAJOR_16x8_INTEL 0x1 +#define CL_AVC_ME_MAJOR_8x16_INTEL 0x2 +#define CL_AVC_ME_MAJOR_8x8_INTEL 0x3 + +#define CL_AVC_ME_MINOR_8x8_INTEL 0x0 +#define CL_AVC_ME_MINOR_8x4_INTEL 0x1 +#define CL_AVC_ME_MINOR_4x8_INTEL 0x2 +#define CL_AVC_ME_MINOR_4x4_INTEL 0x3 + +#define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0 +#define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1 +#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2 + +#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0 +#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E +#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D +#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B +#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77 +#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F +#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F +#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F + +#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0 +#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1 +#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2 +#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3 +#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4 +#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5 +#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6 +#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7 +#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8 +#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9 +#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2 +#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa + +#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 +#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2 + +#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 +#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 +#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3 + +#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0 +#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1 +#define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2 +#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3 + +#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10 +#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15 +#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20 +#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B +#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30 + +#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0 +#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2 +#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4 +#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8 + +#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0 +#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000 + +#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 ) + +#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00 +#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80 + +#define CL_AVC_ME_INTRA_16x16_INTEL 0x0 +#define CL_AVC_ME_INTRA_8x8_INTEL 0x1 +#define CL_AVC_ME_INTRA_4x4_INTEL 0x2 + +#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6 +#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5 +#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3 + +#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60 +#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10 +#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8 +#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4 + +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 +#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 +#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 +#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 + +#define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1 +#define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2 +#define CL_AVC_ME_FRAME_DUAL_INTEL 0x3 + +#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0 +#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1 +#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2 + +#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0 +#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1 + +/******************************************* +* cl_intel_unified_shared_memory extension * +********************************************/ + +/* These APIs are in sync with Revision O of the cl_intel_unified_shared_memory spec! */ + +#define cl_intel_unified_shared_memory 1 + +/* cl_device_info */ +#define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL 0x4190 +#define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL 0x4191 +#define CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4192 +#define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4193 +#define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL 0x4194 + +typedef cl_bitfield cl_device_unified_shared_memory_capabilities_intel; + +/* cl_device_unified_shared_memory_capabilities_intel - bitfield */ +#define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL (1 << 0) +#define CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL (1 << 1) +#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL (1 << 2) +#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL (1 << 3) + +typedef cl_bitfield cl_mem_properties_intel; + +/* cl_mem_properties_intel */ +#define CL_MEM_ALLOC_FLAGS_INTEL 0x4195 + +typedef cl_bitfield cl_mem_alloc_flags_intel; + +/* cl_mem_alloc_flags_intel - bitfield */ +#define CL_MEM_ALLOC_WRITE_COMBINED_INTEL (1 << 0) + +typedef cl_uint cl_mem_info_intel; + +/* cl_mem_alloc_info_intel */ +#define CL_MEM_ALLOC_TYPE_INTEL 0x419A +#define CL_MEM_ALLOC_BASE_PTR_INTEL 0x419B +#define CL_MEM_ALLOC_SIZE_INTEL 0x419C +#define CL_MEM_ALLOC_DEVICE_INTEL 0x419D +/* Enum values 0x419E-0x419F are reserved for future queries. */ + +typedef cl_uint cl_unified_shared_memory_type_intel; + +/* cl_unified_shared_memory_type_intel */ +#define CL_MEM_TYPE_UNKNOWN_INTEL 0x4196 +#define CL_MEM_TYPE_HOST_INTEL 0x4197 +#define CL_MEM_TYPE_DEVICE_INTEL 0x4198 +#define CL_MEM_TYPE_SHARED_INTEL 0x4199 + +typedef cl_uint cl_mem_advice_intel; + +/* cl_mem_advice_intel */ +/* Enum values 0x4208-0x420F are reserved for future memory advices. */ + +/* cl_kernel_exec_info */ +#define CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL 0x4200 +#define CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL 0x4201 +#define CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL 0x4202 +#define CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL 0x4203 + +/* cl_command_type */ +#define CL_COMMAND_MEMFILL_INTEL 0x4204 +#define CL_COMMAND_MEMCPY_INTEL 0x4205 +#define CL_COMMAND_MIGRATEMEM_INTEL 0x4206 +#define CL_COMMAND_MEMADVISE_INTEL 0x4207 + +extern CL_API_ENTRY void* CL_API_CALL +clHostMemAllocINTEL( + cl_context context, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret); + +typedef CL_API_ENTRY void* (CL_API_CALL * +clHostMemAllocINTEL_fn)( + cl_context context, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret); + +extern CL_API_ENTRY void* CL_API_CALL +clDeviceMemAllocINTEL( + cl_context context, + cl_device_id device, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret); + +typedef CL_API_ENTRY void* (CL_API_CALL * +clDeviceMemAllocINTEL_fn)( + cl_context context, + cl_device_id device, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret); + +extern CL_API_ENTRY void* CL_API_CALL +clSharedMemAllocINTEL( + cl_context context, + cl_device_id device, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret); + +typedef CL_API_ENTRY void* (CL_API_CALL * +clSharedMemAllocINTEL_fn)( + cl_context context, + cl_device_id device, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL +clMemFreeINTEL( + cl_context context, + void* ptr); + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +clMemFreeINTEL_fn)( + cl_context context, + void* ptr); + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMemAllocInfoINTEL( + cl_context context, + const void* ptr, + cl_mem_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +clGetMemAllocInfoINTEL_fn)( + cl_context context, + const void* ptr, + cl_mem_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArgMemPointerINTEL( + cl_kernel kernel, + cl_uint arg_index, + const void* arg_value); + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +clSetKernelArgMemPointerINTEL_fn)( + cl_kernel kernel, + cl_uint arg_index, + const void* arg_value); + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMemsetINTEL( /* Deprecated */ + cl_command_queue command_queue, + void* dst_ptr, + cl_int value, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +clEnqueueMemsetINTEL_fn)( /* Deprecated */ + cl_command_queue command_queue, + void* dst_ptr, + cl_int value, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMemFillINTEL( + cl_command_queue command_queue, + void* dst_ptr, + const void* pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +clEnqueueMemFillINTEL_fn)( + cl_command_queue command_queue, + void* dst_ptr, + const void* pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMemcpyINTEL( + cl_command_queue command_queue, + cl_bool blocking, + void* dst_ptr, + const void* src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +clEnqueueMemcpyINTEL_fn)( + cl_command_queue command_queue, + cl_bool blocking, + void* dst_ptr, + const void* src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +#ifdef CL_VERSION_1_2 + +/* Because these APIs use cl_mem_migration_flags, they require + OpenCL 1.2: */ + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemINTEL( + cl_command_queue command_queue, + const void* ptr, + size_t size, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +clEnqueueMigrateMemINTEL_fn)( + cl_command_queue command_queue, + const void* ptr, + size_t size, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMemAdviseINTEL( + cl_command_queue command_queue, + const void* ptr, + size_t size, + cl_mem_advice_intel advice, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +typedef CL_API_ENTRY cl_int (CL_API_CALL * +clEnqueueMemAdviseINTEL_fn)( + cl_command_queue command_queue, + const void* ptr, + size_t size, + cl_mem_advice_intel advice, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event); + +#ifdef __cplusplus +} +#endif + +#endif /* __CL_EXT_INTEL_H */ diff --git a/CL/cl_gl.h b/CL/cl_gl.h new file mode 100644 index 0000000..b587f02 --- /dev/null +++ b/CL/cl_gl.h @@ -0,0 +1,159 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_CL_GL_H +#define __OPENCL_CL_GL_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef cl_uint cl_gl_object_type; +typedef cl_uint cl_gl_texture_info; +typedef cl_uint cl_gl_platform_info; +typedef struct __GLsync *cl_GLsync; + +/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */ +#define CL_GL_OBJECT_BUFFER 0x2000 +#define CL_GL_OBJECT_TEXTURE2D 0x2001 +#define CL_GL_OBJECT_TEXTURE3D 0x2002 +#define CL_GL_OBJECT_RENDERBUFFER 0x2003 +#ifdef CL_VERSION_1_2 +#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E +#define CL_GL_OBJECT_TEXTURE1D 0x200F +#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 +#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 +#endif + +/* cl_gl_texture_info */ +#define CL_GL_TEXTURE_TARGET 0x2004 +#define CL_GL_MIPMAP_LEVEL 0x2005 +#ifdef CL_VERSION_1_2 +#define CL_GL_NUM_SAMPLES 0x2012 +#endif + + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLBuffer(cl_context context, + cl_mem_flags flags, + cl_GLuint bufobj, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLTexture(cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLRenderbuffer(cl_context context, + cl_mem_flags flags, + cl_GLuint renderbuffer, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLObjectInfo(cl_mem memobj, + cl_gl_object_type * gl_object_type, + cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLTextureInfo(cl_mem memobj, + cl_gl_texture_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireGLObjects(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseGLObjects(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + + +/* Deprecated OpenCL 1.1 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateFromGLTexture2D(cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateFromGLTexture3D(cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +/* cl_khr_gl_sharing extension */ + +#define cl_khr_gl_sharing 1 + +typedef cl_uint cl_gl_context_info; + +/* Additional Error Codes */ +#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 + +/* cl_gl_context_info */ +#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 +#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 + +/* Additional cl_context_properties */ +#define CL_GL_CONTEXT_KHR 0x2008 +#define CL_EGL_DISPLAY_KHR 0x2009 +#define CL_GLX_DISPLAY_KHR 0x200A +#define CL_WGL_HDC_KHR 0x200B +#define CL_CGL_SHAREGROUP_KHR 0x200C + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLContextInfoKHR(const cl_context_properties * properties, + cl_gl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( + const cl_context_properties * properties, + cl_gl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret); + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_GL_H */ diff --git a/CL/cl_gl_ext.h b/CL/cl_gl_ext.h new file mode 100644 index 0000000..9bb7540 --- /dev/null +++ b/CL/cl_gl_ext.h @@ -0,0 +1,40 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_CL_GL_EXT_H +#define __OPENCL_CL_GL_EXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* + * cl_khr_gl_event extension + */ +#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateEventFromGLsyncKHR(cl_context context, + cl_GLsync cl_GLsync, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_GL_EXT_H */ diff --git a/CL/cl_half.h b/CL/cl_half.h new file mode 100644 index 0000000..f748d9e --- /dev/null +++ b/CL/cl_half.h @@ -0,0 +1,440 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +/** + * This is a header-only utility library that provides OpenCL host code with + * routines for converting to/from cl_half values. + * + * Example usage: + * + * #include + * ... + * cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE); + * cl_float f = cl_half_to_float(h); + */ + +#ifndef OPENCL_CL_HALF_H +#define OPENCL_CL_HALF_H + +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Rounding mode used when converting to cl_half. + */ +typedef enum +{ + CL_HALF_RTE, // round to nearest even + CL_HALF_RTZ, // round towards zero + CL_HALF_RTP, // round towards positive infinity + CL_HALF_RTN, // round towards negative infinity +} cl_half_rounding_mode; + + +/* Private utility macros. */ +#define CL_HALF_EXP_MASK 0x7C00 +#define CL_HALF_MAX_FINITE_MAG 0x7BFF + + +/* + * Utility to deal with values that overflow when converting to half precision. + */ +static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode, + uint16_t sign) +{ + if (rounding_mode == CL_HALF_RTZ) + { + // Round overflow towards zero -> largest finite number (preserving sign) + return (sign << 15) | CL_HALF_MAX_FINITE_MAG; + } + else if (rounding_mode == CL_HALF_RTP && sign) + { + // Round negative overflow towards positive infinity -> most negative finite number + return (1 << 15) | CL_HALF_MAX_FINITE_MAG; + } + else if (rounding_mode == CL_HALF_RTN && !sign) + { + // Round positive overflow towards negative infinity -> largest finite number + return CL_HALF_MAX_FINITE_MAG; + } + + // Overflow to infinity + return (sign << 15) | CL_HALF_EXP_MASK; +} + +/* + * Utility to deal with values that underflow when converting to half precision. + */ +static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode, + uint16_t sign) +{ + if (rounding_mode == CL_HALF_RTP && !sign) + { + // Round underflow towards positive infinity -> smallest positive value + return (sign << 15) | 1; + } + else if (rounding_mode == CL_HALF_RTN && sign) + { + // Round underflow towards negative infinity -> largest negative value + return (sign << 15) | 1; + } + + // Flush to zero + return (sign << 15); +} + + +/** + * Convert a cl_float to a cl_half. + */ +static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode) +{ + // Type-punning to get direct access to underlying bits + union + { + cl_float f; + uint32_t i; + } f32; + f32.f = f; + + // Extract sign bit + uint16_t sign = f32.i >> 31; + + // Extract FP32 exponent and mantissa + uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF; + uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1); + + // Remove FP32 exponent bias + int32_t exp = f_exp - CL_FLT_MAX_EXP + 1; + + // Add FP16 exponent bias + uint16_t h_exp = exp + CL_HALF_MAX_EXP - 1; + + // Position of the bit that will become the FP16 mantissa LSB + uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG; + + // Check for NaN / infinity + if (f_exp == 0xFF) + { + if (f_mant) + { + // NaN -> propagate mantissa and silence it + uint16_t h_mant = f_mant >> lsb_pos; + h_mant |= 0x200; + return (sign << 15) | CL_HALF_EXP_MASK | h_mant; + } + else + { + // Infinity -> zero mantissa + return (sign << 15) | CL_HALF_EXP_MASK; + } + } + + // Check for zero + if (!f_exp && !f_mant) + { + return (sign << 15); + } + + // Check for overflow + if (exp >= CL_HALF_MAX_EXP) + { + return cl_half_handle_overflow(rounding_mode, sign); + } + + // Check for underflow + if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1)) + { + return cl_half_handle_underflow(rounding_mode, sign); + } + + // Check for value that will become denormal + if (exp < -14) + { + // Denormal -> include the implicit 1 from the FP32 mantissa + h_exp = 0; + f_mant |= 1 << (CL_FLT_MANT_DIG - 1); + + // Mantissa shift amount depends on exponent + lsb_pos = -exp + (CL_FLT_MANT_DIG - 25); + } + + // Generate FP16 mantissa by shifting FP32 mantissa + uint16_t h_mant = f_mant >> lsb_pos; + + // Check whether we need to round + uint32_t halfway = 1 << (lsb_pos - 1); + uint32_t mask = (halfway << 1) - 1; + switch (rounding_mode) + { + case CL_HALF_RTE: + if ((f_mant & mask) > halfway) + { + // More than halfway -> round up + h_mant += 1; + } + else if ((f_mant & mask) == halfway) + { + // Exactly halfway -> round to nearest even + if (h_mant & 0x1) + h_mant += 1; + } + break; + case CL_HALF_RTZ: + // Mantissa has already been truncated -> do nothing + break; + case CL_HALF_RTP: + if ((f_mant & mask) && !sign) + { + // Round positive numbers up + h_mant += 1; + } + break; + case CL_HALF_RTN: + if ((f_mant & mask) && sign) + { + // Round negative numbers down + h_mant += 1; + } + break; + } + + // Check for mantissa overflow + if (h_mant & 0x400) + { + h_exp += 1; + h_mant = 0; + } + + return (sign << 15) | (h_exp << 10) | h_mant; +} + + +/** + * Convert a cl_double to a cl_half. + */ +static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode) +{ + // Type-punning to get direct access to underlying bits + union + { + cl_double d; + uint64_t i; + } f64; + f64.d = d; + + // Extract sign bit + uint16_t sign = f64.i >> 63; + + // Extract FP64 exponent and mantissa + uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF; + uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1); + + // Remove FP64 exponent bias + int64_t exp = d_exp - CL_DBL_MAX_EXP + 1; + + // Add FP16 exponent bias + uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1); + + // Position of the bit that will become the FP16 mantissa LSB + uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG; + + // Check for NaN / infinity + if (d_exp == 0x7FF) + { + if (d_mant) + { + // NaN -> propagate mantissa and silence it + uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos); + h_mant |= 0x200; + return (sign << 15) | CL_HALF_EXP_MASK | h_mant; + } + else + { + // Infinity -> zero mantissa + return (sign << 15) | CL_HALF_EXP_MASK; + } + } + + // Check for zero + if (!d_exp && !d_mant) + { + return (sign << 15); + } + + // Check for overflow + if (exp >= CL_HALF_MAX_EXP) + { + return cl_half_handle_overflow(rounding_mode, sign); + } + + // Check for underflow + if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1)) + { + return cl_half_handle_underflow(rounding_mode, sign); + } + + // Check for value that will become denormal + if (exp < -14) + { + // Include the implicit 1 from the FP64 mantissa + h_exp = 0; + d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1); + + // Mantissa shift amount depends on exponent + lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25)); + } + + // Generate FP16 mantissa by shifting FP64 mantissa + uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos); + + // Check whether we need to round + uint64_t halfway = (uint64_t)1 << (lsb_pos - 1); + uint64_t mask = (halfway << 1) - 1; + switch (rounding_mode) + { + case CL_HALF_RTE: + if ((d_mant & mask) > halfway) + { + // More than halfway -> round up + h_mant += 1; + } + else if ((d_mant & mask) == halfway) + { + // Exactly halfway -> round to nearest even + if (h_mant & 0x1) + h_mant += 1; + } + break; + case CL_HALF_RTZ: + // Mantissa has already been truncated -> do nothing + break; + case CL_HALF_RTP: + if ((d_mant & mask) && !sign) + { + // Round positive numbers up + h_mant += 1; + } + break; + case CL_HALF_RTN: + if ((d_mant & mask) && sign) + { + // Round negative numbers down + h_mant += 1; + } + break; + } + + // Check for mantissa overflow + if (h_mant & 0x400) + { + h_exp += 1; + h_mant = 0; + } + + return (sign << 15) | (h_exp << 10) | h_mant; +} + + +/** + * Convert a cl_half to a cl_float. + */ +static inline cl_float cl_half_to_float(cl_half h) +{ + // Type-punning to get direct access to underlying bits + union + { + cl_float f; + uint32_t i; + } f32; + + // Extract sign bit + uint16_t sign = h >> 15; + + // Extract FP16 exponent and mantissa + uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F; + uint16_t h_mant = h & 0x3FF; + + // Remove FP16 exponent bias + int32_t exp = h_exp - CL_HALF_MAX_EXP + 1; + + // Add FP32 exponent bias + uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1; + + // Check for NaN / infinity + if (h_exp == 0x1F) + { + if (h_mant) + { + // NaN -> propagate mantissa and silence it + uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG); + f_mant |= 0x400000; + f32.i = (sign << 31) | 0x7F800000 | f_mant; + return f32.f; + } + else + { + // Infinity -> zero mantissa + f32.i = (sign << 31) | 0x7F800000; + return f32.f; + } + } + + // Check for zero / denormal + if (h_exp == 0) + { + if (h_mant == 0) + { + // Zero -> zero exponent + f_exp = 0; + } + else + { + // Denormal -> normalize it + // - Shift mantissa to make most-significant 1 implicit + // - Adjust exponent accordingly + uint32_t shift = 0; + while ((h_mant & 0x400) == 0) + { + h_mant <<= 1; + shift++; + } + h_mant &= 0x3FF; + f_exp -= shift - 1; + } + } + + f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13); + return f32.f; +} + + +#undef CL_HALF_EXP_MASK +#undef CL_HALF_MAX_FINITE_MAG + + +#ifdef __cplusplus +} +#endif + + +#endif /* OPENCL_CL_HALF_H */ diff --git a/CL/cl_icd.h b/CL/cl_icd.h new file mode 100644 index 0000000..8c74724 --- /dev/null +++ b/CL/cl_icd.h @@ -0,0 +1,1287 @@ +/******************************************************************************* + * Copyright (c) 2019-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef OPENCL_CL_ICD_H +#define OPENCL_CL_ICD_H + +#include +#include +#include +#include + +#if defined(_WIN32) +#include +#include +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This file contains pointer type definitions for each of the CL API calls as + * well as a type definition for the dispatch table used by the Khronos ICD + * loader (see cl_khr_icd extension specification for background). + */ + +/* API function pointer definitions */ + +// Platform APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPlatformIDs)( + cl_uint num_entries, cl_platform_id *platforms, + cl_uint *num_platforms) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPlatformInfo)( + cl_platform_id platform, cl_platform_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Device APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDs)( + cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, + cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceInfo)( + cl_device_id device, cl_device_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateSubDevices)( + cl_device_id in_device, + const cl_device_partition_property *partition_properties, + cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainDevice)( + cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseDevice)( + cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clCreateSubDevices; +typedef void *cl_api_clRetainDevice; +typedef void *cl_api_clReleaseDevice; + +#endif + +// Context APIs +typedef CL_API_ENTRY cl_context(CL_API_CALL *cl_api_clCreateContext)( + const cl_context_properties *properties, cl_uint num_devices, + const cl_device_id *devices, + void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), + void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_context(CL_API_CALL *cl_api_clCreateContextFromType)( + const cl_context_properties *properties, cl_device_type device_type, + void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), + void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainContext)( + cl_context context) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseContext)( + cl_context context) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetContextInfo)( + cl_context context, cl_context_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Command Queue APIs +typedef CL_API_ENTRY cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueue)( + cl_context context, cl_device_id device, + cl_command_queue_properties properties, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +typedef CL_API_ENTRY +cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueueWithProperties)( + cl_context /* context */, cl_device_id /* device */, + const cl_queue_properties * /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clCreateCommandQueueWithProperties; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainCommandQueue)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseCommandQueue)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetCommandQueueInfo)( + cl_command_queue command_queue, cl_command_queue_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Memory Object APIs +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateBuffer)( + cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage)( + cl_context context, cl_mem_flags flags, const cl_image_format *image_format, + const cl_image_desc *image_desc, void *host_ptr, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clCreateImage; + +#endif + +#ifdef CL_VERSION_3_0 + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateBufferWithProperties)( + cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, + size_t size, void *host_ptr, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_3_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImageWithProperties)( + cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, + const cl_image_format *image_format, const cl_image_desc *image_desc, + void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_3_0; + +#else + +typedef void *cl_api_clCreateBufferWithProperties; +typedef void *cl_api_clCreateImageWithProperties; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainMemObject)( + cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseMemObject)( + cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetSupportedImageFormats)( + cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, + cl_uint num_entries, cl_image_format *image_formats, + cl_uint *num_image_formats) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetMemObjectInfo)( + cl_mem memobj, cl_mem_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetImageInfo)( + cl_mem image, cl_image_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreatePipe)( + cl_context /* context */, cl_mem_flags /* flags */, + cl_uint /* pipe_packet_size */, cl_uint /* pipe_max_packets */, + const cl_pipe_properties * /* properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPipeInfo)( + cl_mem /* pipe */, cl_pipe_info /* param_name */, + size_t /* param_value_size */, void * /* param_value */, + size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clSVMAlloc)( + cl_context /* context */, cl_svm_mem_flags /* flags */, size_t /* size */, + unsigned int /* alignment */)CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY void(CL_API_CALL *cl_api_clSVMFree)( + cl_context /* context */, + void * /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clCreatePipe; +typedef void *cl_api_clGetPipeInfo; +typedef void *cl_api_clSVMAlloc; +typedef void *cl_api_clSVMFree; + +#endif + +// Sampler APIs +typedef CL_API_ENTRY cl_sampler(CL_API_CALL *cl_api_clCreateSampler)( + cl_context context, cl_bool normalized_coords, + cl_addressing_mode addressing_mode, cl_filter_mode filter_mode, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainSampler)( + cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseSampler)( + cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetSamplerInfo)( + cl_sampler sampler, cl_sampler_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +typedef CL_API_ENTRY +cl_sampler(CL_API_CALL *cl_api_clCreateSamplerWithProperties)( + cl_context /* context */, + const cl_sampler_properties * /* sampler_properties */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clCreateSamplerWithProperties; + +#endif + +// Program Object APIs +typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithSource)( + cl_context context, cl_uint count, const char **strings, + const size_t *lengths, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithBinary)( + cl_context context, cl_uint num_devices, const cl_device_id *device_list, + const size_t *lengths, const unsigned char **binaries, + cl_int *binary_status, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY +cl_program(CL_API_CALL *cl_api_clCreateProgramWithBuiltInKernels)( + cl_context context, cl_uint num_devices, const cl_device_id *device_list, + const char *kernel_names, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clCreateProgramWithBuiltInKernels; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainProgram)( + cl_program program) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseProgram)( + cl_program program) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clBuildProgram)( + cl_program program, cl_uint num_devices, const cl_device_id *device_list, + const char *options, + void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), + void *user_data) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCompileProgram)( + cl_program program, cl_uint num_devices, const cl_device_id *device_list, + const char *options, cl_uint num_input_headers, + const cl_program *input_headers, const char **header_include_names, + void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), + void *user_data) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clLinkProgram)( + cl_context context, cl_uint num_devices, const cl_device_id *device_list, + const char *options, cl_uint num_input_programs, + const cl_program *input_programs, + void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), + void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clCompileProgram; +typedef void *cl_api_clLinkProgram; + +#endif + +#ifdef CL_VERSION_2_2 + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clSetProgramSpecializationConstant)( + cl_program program, cl_uint spec_id, size_t spec_size, + const void *spec_value) CL_API_SUFFIX__VERSION_2_2; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetProgramReleaseCallback)( + cl_program program, + void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), + void *user_data) CL_API_SUFFIX__VERSION_2_2; + +#else + +typedef void *cl_api_clSetProgramSpecializationConstant; +typedef void *cl_api_clSetProgramReleaseCallback; + +#endif + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clUnloadPlatformCompiler)( + cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clUnloadPlatformCompiler; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetProgramInfo)( + cl_program program, cl_program_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetProgramBuildInfo)( + cl_program program, cl_device_id device, cl_program_build_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Kernel Object APIs +typedef CL_API_ENTRY cl_kernel(CL_API_CALL *cl_api_clCreateKernel)( + cl_program program, const char *kernel_name, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateKernelsInProgram)( + cl_program program, cl_uint num_kernels, cl_kernel *kernels, + cl_uint *num_kernels_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainKernel)( + cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseKernel)( + cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelArg)( + cl_kernel kernel, cl_uint arg_index, size_t arg_size, + const void *arg_value) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelInfo)( + cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelArgInfo)( + cl_kernel kernel, cl_uint arg_indx, cl_kernel_arg_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clGetKernelArgInfo; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelWorkGroupInfo)( + cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelArgSVMPointer)( + cl_kernel /* kernel */, cl_uint /* arg_index */, + const void * /* arg_value */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelExecInfo)( + cl_kernel /* kernel */, cl_kernel_exec_info /* param_name */, + size_t /* param_value_size */, + const void * /* param_value */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfoKHR)( + cl_kernel /* in_kernel */, cl_device_id /*in_device*/, + cl_kernel_sub_group_info /* param_name */, size_t /*input_value_size*/, + const void * /*input_value*/, size_t /*param_value_size*/, + void * /*param_value*/, + size_t * /*param_value_size_ret*/) CL_EXT_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clSetKernelArgSVMPointer; +typedef void *cl_api_clSetKernelExecInfo; +typedef void *cl_api_clGetKernelSubGroupInfoKHR; + +#endif + +// Event Object APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clWaitForEvents)( + cl_uint num_events, const cl_event *event_list) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetEventInfo)( + cl_event event, cl_event_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainEvent)(cl_event event) + CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseEvent)(cl_event event) + CL_API_SUFFIX__VERSION_1_0; + +// Profiling APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetEventProfilingInfo)( + cl_event event, cl_profiling_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +// Flush and Finish APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clFlush)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clFinish)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +// Enqueued Commands APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBuffer)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, + size_t offset, size_t cb, void *ptr, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBufferRect)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, + const size_t *buffer_origin, const size_t *host_origin, + const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, + size_t host_row_pitch, size_t host_slice_pitch, void *ptr, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_1; + +#else + +typedef void *cl_api_clEnqueueReadBufferRect; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBuffer)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, + size_t offset, size_t cb, const void *ptr, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBufferRect)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, + const size_t *buffer_origin, const size_t *host_origin, + const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, + size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_1; + +#else + +typedef void *cl_api_clEnqueueWriteBufferRect; + +#endif + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueFillBuffer)( + cl_command_queue command_queue, cl_mem buffer, const void *pattern, + size_t pattern_size, size_t offset, size_t cb, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clEnqueueFillBuffer; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBuffer)( + cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, + size_t src_offset, size_t dst_offset, size_t cb, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferRect)( + cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, + const size_t *src_origin, const size_t *dst_origin, const size_t *region, + size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, + size_t dst_slice_pitch, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_1; + +#else + +typedef void *cl_api_clEnqueueCopyBufferRect; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadImage)( + cl_command_queue command_queue, cl_mem image, cl_bool blocking_read, + const size_t *origin, const size_t *region, size_t row_pitch, + size_t slice_pitch, void *ptr, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteImage)( + cl_command_queue command_queue, cl_mem image, cl_bool blocking_write, + const size_t *origin, const size_t *region, size_t input_row_pitch, + size_t input_slice_pitch, const void *ptr, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueFillImage)( + cl_command_queue command_queue, cl_mem image, const void *fill_color, + const size_t origin[3], const size_t region[3], + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clEnqueueFillImage; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyImage)( + cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image, + const size_t *src_origin, const size_t *dst_origin, const size_t *region, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyImageToBuffer)( + cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer, + const size_t *src_origin, const size_t *region, size_t dst_offset, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferToImage)( + cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_image, + size_t src_offset, const size_t *dst_origin, const size_t *region, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clEnqueueMapBuffer)( + cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, + cl_map_flags map_flags, size_t offset, size_t cb, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clEnqueueMapImage)( + cl_command_queue command_queue, cl_mem image, cl_bool blocking_map, + cl_map_flags map_flags, const size_t *origin, const size_t *region, + size_t *image_row_pitch, size_t *image_slice_pitch, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueUnmapMemObject)( + cl_command_queue command_queue, cl_mem memobj, void *mapped_ptr, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMigrateMemObjects)( + cl_command_queue command_queue, cl_uint num_mem_objects, + const cl_mem *mem_objects, cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clEnqueueMigrateMemObjects; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNDRangeKernel)( + cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, + const size_t *global_work_offset, const size_t *global_work_size, + const size_t *local_work_size, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueTask)( + cl_command_queue command_queue, cl_kernel kernel, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNativeKernel)( + cl_command_queue command_queue, void(CL_CALLBACK *user_func)(void *), + void *args, size_t cb_args, cl_uint num_mem_objects, const cl_mem *mem_list, + const void **args_mem_loc, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMarkerWithWaitList)( + cl_command_queue command_queue, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueBarrierWithWaitList)( + cl_command_queue command_queue, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY void *( + CL_API_CALL *cl_api_clGetExtensionFunctionAddressForPlatform)( + cl_platform_id platform, + const char *function_name)CL_API_SUFFIX__VERSION_1_2; + +#else + +typedef void *cl_api_clEnqueueMarkerWithWaitList; +typedef void *cl_api_clEnqueueBarrierWithWaitList; +typedef void *cl_api_clGetExtensionFunctionAddressForPlatform; + +#endif + +// Shared Virtual Memory APIs + +#ifdef CL_VERSION_2_0 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMFree)( + cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */, + void ** /* svm_pointers */, + void(CL_CALLBACK *pfn_free_func)(cl_command_queue /* queue */, + cl_uint /* num_svm_pointers */, + void ** /* svm_pointers[] */, + void * /* user_data */), + void * /* user_data */, cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemcpy)( + cl_command_queue /* command_queue */, cl_bool /* blocking_copy */, + void * /* dst_ptr */, const void * /* src_ptr */, size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemFill)( + cl_command_queue /* command_queue */, void * /* svm_ptr */, + const void * /* pattern */, size_t /* pattern_size */, size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMap)( + cl_command_queue /* command_queue */, cl_bool /* blocking_map */, + cl_map_flags /* map_flags */, void * /* svm_ptr */, size_t /* size */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMUnmap)( + cl_command_queue /* command_queue */, void * /* svm_ptr */, + cl_uint /* num_events_in_wait_list */, + const cl_event * /* event_wait_list */, + cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; + +#else + +typedef void *cl_api_clEnqueueSVMFree; +typedef void *cl_api_clEnqueueSVMMemcpy; +typedef void *cl_api_clEnqueueSVMMemFill; +typedef void *cl_api_clEnqueueSVMMap; +typedef void *cl_api_clEnqueueSVMUnmap; + +#endif + +// Deprecated APIs +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetCommandQueueProperty)( + cl_command_queue command_queue, cl_command_queue_properties properties, + cl_bool enable, cl_command_queue_properties *old_properties) + CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage2D)( + cl_context context, cl_mem_flags flags, const cl_image_format *image_format, + size_t image_width, size_t image_height, size_t image_row_pitch, + void *host_ptr, cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage3D)( + cl_context context, cl_mem_flags flags, const cl_image_format *image_format, + size_t image_width, size_t image_height, size_t image_depth, + size_t image_row_pitch, size_t image_slice_pitch, void *host_ptr, + cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clUnloadCompiler)(void) + CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMarker)( + cl_command_queue command_queue, + cl_event *event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWaitForEvents)( + cl_command_queue command_queue, cl_uint num_events, + const cl_event *event_list) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueBarrier)( + cl_command_queue command_queue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clGetExtensionFunctionAddress)( + const char *function_name)CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +// GL and other APIs +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLBuffer)( + cl_context context, cl_mem_flags flags, cl_GLuint bufobj, + int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture)( + cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, + cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture2D)( + cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, + cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture3D)( + cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, + cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLRenderbuffer)( + cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLObjectInfo)( + cl_mem memobj, cl_gl_object_type *gl_object_type, + cl_GLuint *gl_object_name) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLTextureInfo)( + cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireGLObjects)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseGLObjects)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +/* cl_khr_gl_sharing */ +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLContextInfoKHR)( + const cl_context_properties *properties, cl_gl_context_info param_name, + size_t param_value_size, void *param_value, size_t *param_value_size_ret); + +/* cl_khr_gl_event */ +typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateEventFromGLsyncKHR)( + cl_context context, cl_GLsync sync, cl_int *errcode_ret); + +#if defined(_WIN32) + +/* cl_khr_d3d10_sharing */ + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D10KHR)( + cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, + void *d3d_object, cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, cl_device_id *devices, + cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D10Buffer *resource, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture2DKHR)( + cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, + UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture3DKHR)( + cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, + UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D10KHR( + cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, + void *d3d_object, cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices); + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromD3D10BufferKHR(cl_context context, cl_mem_flags flags, + ID3D10Buffer *resource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture2DKHR( + cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, + UINT subresource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture3DKHR( + cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, + UINT subresource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D10ObjectsKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +/* cl_khr_d3d11_sharing */ +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D11KHR)( + cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, + void *d3d_object, cl_d3d11_device_set_khr d3d_device_set, + cl_uint num_entries, cl_device_id *devices, + cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D11Buffer *resource, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture2DKHR)( + cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, + UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture3DKHR)( + cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, + UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D11ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D11ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +/* cl_khr_dx9_media_sharing */ +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR)( + cl_platform_id platform, cl_uint num_media_adapters, + cl_dx9_media_adapter_type_khr *media_adapters_type, void *media_adapters, + cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, + cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromDX9MediaSurfaceKHR)( + cl_context context, cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, void *surface_info, + cl_uint plane, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueAcquireDX9MediaSurfacesKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clEnqueueReleaseDX9MediaSurfacesKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_1_2; + +/* cl_khr_d3d11_sharing */ +extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D11KHR( + cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, + void *d3d_object, cl_d3d11_device_set_khr d3d_device_set, + cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices); + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromD3D11BufferKHR(cl_context context, cl_mem_flags flags, + ID3D11Buffer *resource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture2DKHR( + cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, + UINT subresource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture3DKHR( + cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, + UINT subresource, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D11ObjectsKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +/* cl_khr_dx9_media_sharing */ +extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromDX9MediaAdapterKHR( + cl_platform_id platform, cl_uint num_media_adapters, + cl_dx9_media_adapter_type_khr *media_adapter_type, void *media_adapters, + cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, + cl_device_id *devices, cl_uint *num_devices); + +extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceKHR( + cl_context context, cl_mem_flags flags, + cl_dx9_media_adapter_type_khr adapter_type, void *surface_info, + cl_uint plane, cl_int *errcode_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireDX9MediaSurfacesKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +#else + +/* cl_khr_d3d10_sharing */ +typedef void *cl_api_clGetDeviceIDsFromD3D10KHR; +typedef void *cl_api_clCreateFromD3D10BufferKHR; +typedef void *cl_api_clCreateFromD3D10Texture2DKHR; +typedef void *cl_api_clCreateFromD3D10Texture3DKHR; +typedef void *cl_api_clEnqueueAcquireD3D10ObjectsKHR; +typedef void *cl_api_clEnqueueReleaseD3D10ObjectsKHR; + +/* cl_khr_d3d11_sharing */ +typedef void *cl_api_clGetDeviceIDsFromD3D11KHR; +typedef void *cl_api_clCreateFromD3D11BufferKHR; +typedef void *cl_api_clCreateFromD3D11Texture2DKHR; +typedef void *cl_api_clCreateFromD3D11Texture3DKHR; +typedef void *cl_api_clEnqueueAcquireD3D11ObjectsKHR; +typedef void *cl_api_clEnqueueReleaseD3D11ObjectsKHR; + +/* cl_khr_dx9_media_sharing */ +typedef void *cl_api_clCreateFromDX9MediaSurfaceKHR; +typedef void *cl_api_clEnqueueAcquireDX9MediaSurfacesKHR; +typedef void *cl_api_clEnqueueReleaseDX9MediaSurfacesKHR; +typedef void *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR; + +#endif + +/* OpenCL 1.1 */ + +#ifdef CL_VERSION_1_1 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetEventCallback)( + cl_event /* event */, cl_int /* command_exec_callback_type */, + void(CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), + void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateSubBuffer)( + cl_mem /* buffer */, cl_mem_flags /* flags */, + cl_buffer_create_type /* buffer_create_type */, + const void * /* buffer_create_info */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY +cl_int(CL_API_CALL *cl_api_clSetMemObjectDestructorCallback)( + cl_mem /* memobj */, + void(CL_CALLBACK * /*pfn_notify*/)(cl_mem /* memobj */, + void * /*user_data*/), + void * /*user_data */) CL_API_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateUserEvent)( + cl_context /* context */, + cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetUserEventStatus)( + cl_event /* event */, + cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; + +#else + +typedef void *cl_api_clSetEventCallback; +typedef void *cl_api_clCreateSubBuffer; +typedef void *cl_api_clSetMemObjectDestructorCallback; +typedef void *cl_api_clCreateUserEvent; +typedef void *cl_api_clSetUserEventStatus; + +#endif + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateSubDevicesEXT)( + cl_device_id in_device, + const cl_device_partition_property_ext *partition_properties, + cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainDeviceEXT)( + cl_device_id device) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseDeviceEXT)( + cl_device_id device) CL_API_SUFFIX__VERSION_1_0; + +/* cl_khr_egl_image */ +typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromEGLImageKHR)( + cl_context context, CLeglDisplayKHR display, CLeglImageKHR image, + cl_mem_flags flags, const cl_egl_image_properties_khr *properties, + cl_int *errcode_ret); + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireEGLObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseEGLObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem *mem_objects, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, cl_event *event); + +/* cl_khr_egl_event */ +typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateEventFromEGLSyncKHR)( + cl_context context, CLeglSyncKHR sync, CLeglDisplayKHR display, + cl_int *errcode_ret); + +#ifdef CL_VERSION_2_1 + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetDefaultDeviceCommandQueue)( + cl_context context, cl_device_id device, + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithIL)( + cl_context context, const void *il, size_t length, + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfo)( + cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info param_name, + size_t input_value_size, const void *input_value, size_t param_value_size, + void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_kernel(CL_API_CALL *cl_api_clCloneKernel)( + cl_kernel source_kernel, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMigrateMem)( + cl_command_queue command_queue, cl_uint num_svm_pointers, + const void **svm_pointers, const size_t *sizes, + cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceAndHostTimer)( + cl_device_id device, cl_ulong *device_timestamp, + cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetHostTimer)( + cl_device_id device, cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +#else + +typedef void *cl_api_clSetDefaultDeviceCommandQueue; +typedef void *cl_api_clCreateProgramWithIL; +typedef void *cl_api_clGetKernelSubGroupInfo; +typedef void *cl_api_clCloneKernel; +typedef void *cl_api_clEnqueueSVMMigrateMem; +typedef void *cl_api_clGetDeviceAndHostTimer; +typedef void *cl_api_clGetHostTimer; + +#endif + +/* Vendor dispatch table struture */ + +typedef struct _cl_icd_dispatch { + /* OpenCL 1.0 */ + cl_api_clGetPlatformIDs clGetPlatformIDs; + cl_api_clGetPlatformInfo clGetPlatformInfo; + cl_api_clGetDeviceIDs clGetDeviceIDs; + cl_api_clGetDeviceInfo clGetDeviceInfo; + cl_api_clCreateContext clCreateContext; + cl_api_clCreateContextFromType clCreateContextFromType; + cl_api_clRetainContext clRetainContext; + cl_api_clReleaseContext clReleaseContext; + cl_api_clGetContextInfo clGetContextInfo; + cl_api_clCreateCommandQueue clCreateCommandQueue; + cl_api_clRetainCommandQueue clRetainCommandQueue; + cl_api_clReleaseCommandQueue clReleaseCommandQueue; + cl_api_clGetCommandQueueInfo clGetCommandQueueInfo; + cl_api_clSetCommandQueueProperty clSetCommandQueueProperty; + cl_api_clCreateBuffer clCreateBuffer; + cl_api_clCreateImage2D clCreateImage2D; + cl_api_clCreateImage3D clCreateImage3D; + cl_api_clRetainMemObject clRetainMemObject; + cl_api_clReleaseMemObject clReleaseMemObject; + cl_api_clGetSupportedImageFormats clGetSupportedImageFormats; + cl_api_clGetMemObjectInfo clGetMemObjectInfo; + cl_api_clGetImageInfo clGetImageInfo; + cl_api_clCreateSampler clCreateSampler; + cl_api_clRetainSampler clRetainSampler; + cl_api_clReleaseSampler clReleaseSampler; + cl_api_clGetSamplerInfo clGetSamplerInfo; + cl_api_clCreateProgramWithSource clCreateProgramWithSource; + cl_api_clCreateProgramWithBinary clCreateProgramWithBinary; + cl_api_clRetainProgram clRetainProgram; + cl_api_clReleaseProgram clReleaseProgram; + cl_api_clBuildProgram clBuildProgram; + cl_api_clUnloadCompiler clUnloadCompiler; + cl_api_clGetProgramInfo clGetProgramInfo; + cl_api_clGetProgramBuildInfo clGetProgramBuildInfo; + cl_api_clCreateKernel clCreateKernel; + cl_api_clCreateKernelsInProgram clCreateKernelsInProgram; + cl_api_clRetainKernel clRetainKernel; + cl_api_clReleaseKernel clReleaseKernel; + cl_api_clSetKernelArg clSetKernelArg; + cl_api_clGetKernelInfo clGetKernelInfo; + cl_api_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo; + cl_api_clWaitForEvents clWaitForEvents; + cl_api_clGetEventInfo clGetEventInfo; + cl_api_clRetainEvent clRetainEvent; + cl_api_clReleaseEvent clReleaseEvent; + cl_api_clGetEventProfilingInfo clGetEventProfilingInfo; + cl_api_clFlush clFlush; + cl_api_clFinish clFinish; + cl_api_clEnqueueReadBuffer clEnqueueReadBuffer; + cl_api_clEnqueueWriteBuffer clEnqueueWriteBuffer; + cl_api_clEnqueueCopyBuffer clEnqueueCopyBuffer; + cl_api_clEnqueueReadImage clEnqueueReadImage; + cl_api_clEnqueueWriteImage clEnqueueWriteImage; + cl_api_clEnqueueCopyImage clEnqueueCopyImage; + cl_api_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer; + cl_api_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage; + cl_api_clEnqueueMapBuffer clEnqueueMapBuffer; + cl_api_clEnqueueMapImage clEnqueueMapImage; + cl_api_clEnqueueUnmapMemObject clEnqueueUnmapMemObject; + cl_api_clEnqueueNDRangeKernel clEnqueueNDRangeKernel; + cl_api_clEnqueueTask clEnqueueTask; + cl_api_clEnqueueNativeKernel clEnqueueNativeKernel; + cl_api_clEnqueueMarker clEnqueueMarker; + cl_api_clEnqueueWaitForEvents clEnqueueWaitForEvents; + cl_api_clEnqueueBarrier clEnqueueBarrier; + cl_api_clGetExtensionFunctionAddress clGetExtensionFunctionAddress; + cl_api_clCreateFromGLBuffer clCreateFromGLBuffer; + cl_api_clCreateFromGLTexture2D clCreateFromGLTexture2D; + cl_api_clCreateFromGLTexture3D clCreateFromGLTexture3D; + cl_api_clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer; + cl_api_clGetGLObjectInfo clGetGLObjectInfo; + cl_api_clGetGLTextureInfo clGetGLTextureInfo; + cl_api_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects; + cl_api_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects; + cl_api_clGetGLContextInfoKHR clGetGLContextInfoKHR; + + /* cl_khr_d3d10_sharing */ + cl_api_clGetDeviceIDsFromD3D10KHR clGetDeviceIDsFromD3D10KHR; + cl_api_clCreateFromD3D10BufferKHR clCreateFromD3D10BufferKHR; + cl_api_clCreateFromD3D10Texture2DKHR clCreateFromD3D10Texture2DKHR; + cl_api_clCreateFromD3D10Texture3DKHR clCreateFromD3D10Texture3DKHR; + cl_api_clEnqueueAcquireD3D10ObjectsKHR clEnqueueAcquireD3D10ObjectsKHR; + cl_api_clEnqueueReleaseD3D10ObjectsKHR clEnqueueReleaseD3D10ObjectsKHR; + + /* OpenCL 1.1 */ + cl_api_clSetEventCallback clSetEventCallback; + cl_api_clCreateSubBuffer clCreateSubBuffer; + cl_api_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback; + cl_api_clCreateUserEvent clCreateUserEvent; + cl_api_clSetUserEventStatus clSetUserEventStatus; + cl_api_clEnqueueReadBufferRect clEnqueueReadBufferRect; + cl_api_clEnqueueWriteBufferRect clEnqueueWriteBufferRect; + cl_api_clEnqueueCopyBufferRect clEnqueueCopyBufferRect; + + /* cl_ext_device_fission */ + cl_api_clCreateSubDevicesEXT clCreateSubDevicesEXT; + cl_api_clRetainDeviceEXT clRetainDeviceEXT; + cl_api_clReleaseDeviceEXT clReleaseDeviceEXT; + + /* cl_khr_gl_event */ + cl_api_clCreateEventFromGLsyncKHR clCreateEventFromGLsyncKHR; + + /* OpenCL 1.2 */ + cl_api_clCreateSubDevices clCreateSubDevices; + cl_api_clRetainDevice clRetainDevice; + cl_api_clReleaseDevice clReleaseDevice; + cl_api_clCreateImage clCreateImage; + cl_api_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels; + cl_api_clCompileProgram clCompileProgram; + cl_api_clLinkProgram clLinkProgram; + cl_api_clUnloadPlatformCompiler clUnloadPlatformCompiler; + cl_api_clGetKernelArgInfo clGetKernelArgInfo; + cl_api_clEnqueueFillBuffer clEnqueueFillBuffer; + cl_api_clEnqueueFillImage clEnqueueFillImage; + cl_api_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects; + cl_api_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList; + cl_api_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList; + cl_api_clGetExtensionFunctionAddressForPlatform + clGetExtensionFunctionAddressForPlatform; + cl_api_clCreateFromGLTexture clCreateFromGLTexture; + + /* cl_khr_d3d11_sharing */ + cl_api_clGetDeviceIDsFromD3D11KHR clGetDeviceIDsFromD3D11KHR; + cl_api_clCreateFromD3D11BufferKHR clCreateFromD3D11BufferKHR; + cl_api_clCreateFromD3D11Texture2DKHR clCreateFromD3D11Texture2DKHR; + cl_api_clCreateFromD3D11Texture3DKHR clCreateFromD3D11Texture3DKHR; + cl_api_clCreateFromDX9MediaSurfaceKHR clCreateFromDX9MediaSurfaceKHR; + cl_api_clEnqueueAcquireD3D11ObjectsKHR clEnqueueAcquireD3D11ObjectsKHR; + cl_api_clEnqueueReleaseD3D11ObjectsKHR clEnqueueReleaseD3D11ObjectsKHR; + + /* cl_khr_dx9_media_sharing */ + cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR + clGetDeviceIDsFromDX9MediaAdapterKHR; + cl_api_clEnqueueAcquireDX9MediaSurfacesKHR + clEnqueueAcquireDX9MediaSurfacesKHR; + cl_api_clEnqueueReleaseDX9MediaSurfacesKHR + clEnqueueReleaseDX9MediaSurfacesKHR; + + /* cl_khr_egl_image */ + cl_api_clCreateFromEGLImageKHR clCreateFromEGLImageKHR; + cl_api_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR; + cl_api_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR; + + /* cl_khr_egl_event */ + cl_api_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR; + + /* OpenCL 2.0 */ + cl_api_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties; + cl_api_clCreatePipe clCreatePipe; + cl_api_clGetPipeInfo clGetPipeInfo; + cl_api_clSVMAlloc clSVMAlloc; + cl_api_clSVMFree clSVMFree; + cl_api_clEnqueueSVMFree clEnqueueSVMFree; + cl_api_clEnqueueSVMMemcpy clEnqueueSVMMemcpy; + cl_api_clEnqueueSVMMemFill clEnqueueSVMMemFill; + cl_api_clEnqueueSVMMap clEnqueueSVMMap; + cl_api_clEnqueueSVMUnmap clEnqueueSVMUnmap; + cl_api_clCreateSamplerWithProperties clCreateSamplerWithProperties; + cl_api_clSetKernelArgSVMPointer clSetKernelArgSVMPointer; + cl_api_clSetKernelExecInfo clSetKernelExecInfo; + + /* cl_khr_sub_groups */ + cl_api_clGetKernelSubGroupInfoKHR clGetKernelSubGroupInfoKHR; + + /* OpenCL 2.1 */ + cl_api_clCloneKernel clCloneKernel; + cl_api_clCreateProgramWithIL clCreateProgramWithIL; + cl_api_clEnqueueSVMMigrateMem clEnqueueSVMMigrateMem; + cl_api_clGetDeviceAndHostTimer clGetDeviceAndHostTimer; + cl_api_clGetHostTimer clGetHostTimer; + cl_api_clGetKernelSubGroupInfo clGetKernelSubGroupInfo; + cl_api_clSetDefaultDeviceCommandQueue clSetDefaultDeviceCommandQueue; + + /* OpenCL 2.2 */ + cl_api_clSetProgramReleaseCallback clSetProgramReleaseCallback; + cl_api_clSetProgramSpecializationConstant clSetProgramSpecializationConstant; + + /* OpenCL 3.0 */ + cl_api_clCreateBufferWithProperties clCreateBufferWithProperties; + cl_api_clCreateImageWithProperties clCreateImageWithProperties; + +} cl_icd_dispatch; + +#ifdef __cplusplus +} +#endif + +#endif /* #ifndef OPENCL_CL_ICD_H */ diff --git a/CL/cl_platform.h b/CL/cl_platform.h new file mode 100644 index 0000000..1bd7d4b --- /dev/null +++ b/CL/cl_platform.h @@ -0,0 +1,1384 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __CL_PLATFORM_H +#define __CL_PLATFORM_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_WIN32) + #define CL_API_ENTRY + #define CL_API_CALL __stdcall + #define CL_CALLBACK __stdcall +#else + #define CL_API_ENTRY + #define CL_API_CALL + #define CL_CALLBACK +#endif + +/* + * Deprecation flags refer to the last version of the header in which the + * feature was not deprecated. + * + * E.g. VERSION_1_1_DEPRECATED means the feature is present in 1.1 without + * deprecation but is deprecated in versions later than 1.1. + */ + +#define CL_EXTENSION_WEAK_LINK +#define CL_API_SUFFIX__VERSION_1_0 +#define CL_EXT_SUFFIX__VERSION_1_0 +#define CL_API_SUFFIX__VERSION_1_1 +#define CL_EXT_SUFFIX__VERSION_1_1 +#define CL_API_SUFFIX__VERSION_1_2 +#define CL_EXT_SUFFIX__VERSION_1_2 +#define CL_API_SUFFIX__VERSION_2_0 +#define CL_EXT_SUFFIX__VERSION_2_0 +#define CL_API_SUFFIX__VERSION_2_1 +#define CL_EXT_SUFFIX__VERSION_2_1 +#define CL_API_SUFFIX__VERSION_2_2 +#define CL_EXT_SUFFIX__VERSION_2_2 +#define CL_API_SUFFIX__VERSION_3_0 +#define CL_EXT_SUFFIX__VERSION_3_0 +#define CL_API_SUFFIX__EXPERIMENTAL +#define CL_EXT_SUFFIX__EXPERIMENTAL + + +#ifdef __GNUC__ + #define CL_EXT_SUFFIX_DEPRECATED __attribute__((deprecated)) + #define CL_EXT_PREFIX_DEPRECATED +#elif defined(_WIN32) + #define CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX_DEPRECATED __declspec(deprecated) +#else + #define CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS + #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #endif + +#ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_2_1_APIS + #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_2_2_APIS + #define CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_2_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_2_DEPRECATED CL_EXT_PREFIX_DEPRECATED +#endif + +#if (defined (_WIN32) && defined(_MSC_VER)) + +/* scalar types */ +typedef signed __int8 cl_char; +typedef unsigned __int8 cl_uchar; +typedef signed __int16 cl_short; +typedef unsigned __int16 cl_ushort; +typedef signed __int32 cl_int; +typedef unsigned __int32 cl_uint; +typedef signed __int64 cl_long; +typedef unsigned __int64 cl_ulong; + +typedef unsigned __int16 cl_half; +typedef float cl_float; +typedef double cl_double; + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 1.1920928955078125e-7f + +#define CL_HALF_DIG 3 +#define CL_HALF_MANT_DIG 11 +#define CL_HALF_MAX_10_EXP +4 +#define CL_HALF_MAX_EXP +16 +#define CL_HALF_MIN_10_EXP -4 +#define CL_HALF_MIN_EXP -13 +#define CL_HALF_RADIX 2 +#define CL_HALF_MAX 65504.0f +#define CL_HALF_MIN 6.103515625e-05f +#define CL_HALF_EPSILON 9.765625e-04f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 1.7976931348623158e+308 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.7182818284590452354 +#define CL_M_LOG2E 1.4426950408889634074 +#define CL_M_LOG10E 0.43429448190325182765 +#define CL_M_LN2 0.69314718055994530942 +#define CL_M_LN10 2.30258509299404568402 +#define CL_M_PI 3.14159265358979323846 +#define CL_M_PI_2 1.57079632679489661923 +#define CL_M_PI_4 0.78539816339744830962 +#define CL_M_1_PI 0.31830988618379067154 +#define CL_M_2_PI 0.63661977236758134308 +#define CL_M_2_SQRTPI 1.12837916709551257390 +#define CL_M_SQRT2 1.41421356237309504880 +#define CL_M_SQRT1_2 0.70710678118654752440 + +#define CL_M_E_F 2.718281828f +#define CL_M_LOG2E_F 1.442695041f +#define CL_M_LOG10E_F 0.434294482f +#define CL_M_LN2_F 0.693147181f +#define CL_M_LN10_F 2.302585093f +#define CL_M_PI_F 3.141592654f +#define CL_M_PI_2_F 1.570796327f +#define CL_M_PI_4_F 0.785398163f +#define CL_M_1_PI_F 0.318309886f +#define CL_M_2_PI_F 0.636619772f +#define CL_M_2_SQRTPI_F 1.128379167f +#define CL_M_SQRT2_F 1.414213562f +#define CL_M_SQRT1_2_F 0.707106781f + +#define CL_NAN (CL_INFINITY - CL_INFINITY) +#define CL_HUGE_VALF ((cl_float) 1e50) +#define CL_HUGE_VAL ((cl_double) 1e500) +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#else + +#include + +/* scalar types */ +typedef int8_t cl_char; +typedef uint8_t cl_uchar; +typedef int16_t cl_short; +typedef uint16_t cl_ushort; +typedef int32_t cl_int; +typedef uint32_t cl_uint; +typedef int64_t cl_long; +typedef uint64_t cl_ulong; + +typedef uint16_t cl_half; +typedef float cl_float; +typedef double cl_double; + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 1.1920928955078125e-7f + +#define CL_HALF_DIG 3 +#define CL_HALF_MANT_DIG 11 +#define CL_HALF_MAX_10_EXP +4 +#define CL_HALF_MAX_EXP +16 +#define CL_HALF_MIN_10_EXP -4 +#define CL_HALF_MIN_EXP -13 +#define CL_HALF_RADIX 2 +#define CL_HALF_MAX 65504.0f +#define CL_HALF_MIN 6.103515625e-05f +#define CL_HALF_EPSILON 9.765625e-04f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.7182818284590452354 +#define CL_M_LOG2E 1.4426950408889634074 +#define CL_M_LOG10E 0.43429448190325182765 +#define CL_M_LN2 0.69314718055994530942 +#define CL_M_LN10 2.30258509299404568402 +#define CL_M_PI 3.14159265358979323846 +#define CL_M_PI_2 1.57079632679489661923 +#define CL_M_PI_4 0.78539816339744830962 +#define CL_M_1_PI 0.31830988618379067154 +#define CL_M_2_PI 0.63661977236758134308 +#define CL_M_2_SQRTPI 1.12837916709551257390 +#define CL_M_SQRT2 1.41421356237309504880 +#define CL_M_SQRT1_2 0.70710678118654752440 + +#define CL_M_E_F 2.718281828f +#define CL_M_LOG2E_F 1.442695041f +#define CL_M_LOG10E_F 0.434294482f +#define CL_M_LN2_F 0.693147181f +#define CL_M_LN10_F 2.302585093f +#define CL_M_PI_F 3.141592654f +#define CL_M_PI_2_F 1.570796327f +#define CL_M_PI_4_F 0.785398163f +#define CL_M_1_PI_F 0.318309886f +#define CL_M_2_PI_F 0.636619772f +#define CL_M_2_SQRTPI_F 1.128379167f +#define CL_M_SQRT2_F 1.414213562f +#define CL_M_SQRT1_2_F 0.707106781f + +#if defined( __GNUC__ ) + #define CL_HUGE_VALF __builtin_huge_valf() + #define CL_HUGE_VAL __builtin_huge_val() + #define CL_NAN __builtin_nanf( "" ) +#else + #define CL_HUGE_VALF ((cl_float) 1e50) + #define CL_HUGE_VAL ((cl_double) 1e500) + float nanf( const char * ); + #define CL_NAN nanf( "" ) +#endif +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#endif + +#include + +/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */ +typedef unsigned int cl_GLuint; +typedef int cl_GLint; +typedef unsigned int cl_GLenum; + +/* + * Vector types + * + * Note: OpenCL requires that all types be naturally aligned. + * This means that vector types must be naturally aligned. + * For example, a vector of four floats must be aligned to + * a 16 byte boundary (calculated as 4 * the natural 4-byte + * alignment of the float). The alignment qualifiers here + * will only function properly if your compiler supports them + * and if you don't actively work to defeat them. For example, + * in order for a cl_float4 to be 16 byte aligned in a struct, + * the start of the struct must itself be 16-byte aligned. + * + * Maintaining proper alignment is the user's responsibility. + */ + +/* Define basic vector types */ +#if defined( __VEC__ ) + #include /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */ + typedef __vector unsigned char __cl_uchar16; + typedef __vector signed char __cl_char16; + typedef __vector unsigned short __cl_ushort8; + typedef __vector signed short __cl_short8; + typedef __vector unsigned int __cl_uint4; + typedef __vector signed int __cl_int4; + typedef __vector float __cl_float4; + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_UINT4__ 1 + #define __CL_INT4__ 1 + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef float __cl_float4 __attribute__((vector_size(16))); + #else + typedef __m128 __cl_float4; + #endif + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE2__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16))); + typedef cl_char __cl_char16 __attribute__((vector_size(16))); + typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16))); + typedef cl_short __cl_short8 __attribute__((vector_size(16))); + typedef cl_uint __cl_uint4 __attribute__((vector_size(16))); + typedef cl_int __cl_int4 __attribute__((vector_size(16))); + typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16))); + typedef cl_long __cl_long2 __attribute__((vector_size(16))); + typedef cl_double __cl_double2 __attribute__((vector_size(16))); + #else + typedef __m128i __cl_uchar16; + typedef __m128i __cl_char16; + typedef __m128i __cl_ushort8; + typedef __m128i __cl_short8; + typedef __m128i __cl_uint4; + typedef __m128i __cl_int4; + typedef __m128i __cl_ulong2; + typedef __m128i __cl_long2; + typedef __m128d __cl_double2; + #endif + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_INT4__ 1 + #define __CL_UINT4__ 1 + #define __CL_ULONG2__ 1 + #define __CL_LONG2__ 1 + #define __CL_DOUBLE2__ 1 +#endif + +#if defined( __MMX__ ) + #include + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8))); + typedef cl_char __cl_char8 __attribute__((vector_size(8))); + typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8))); + typedef cl_short __cl_short4 __attribute__((vector_size(8))); + typedef cl_uint __cl_uint2 __attribute__((vector_size(8))); + typedef cl_int __cl_int2 __attribute__((vector_size(8))); + typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8))); + typedef cl_long __cl_long1 __attribute__((vector_size(8))); + typedef cl_float __cl_float2 __attribute__((vector_size(8))); + #else + typedef __m64 __cl_uchar8; + typedef __m64 __cl_char8; + typedef __m64 __cl_ushort4; + typedef __m64 __cl_short4; + typedef __m64 __cl_uint2; + typedef __m64 __cl_int2; + typedef __m64 __cl_ulong1; + typedef __m64 __cl_long1; + typedef __m64 __cl_float2; + #endif + #define __CL_UCHAR8__ 1 + #define __CL_CHAR8__ 1 + #define __CL_USHORT4__ 1 + #define __CL_SHORT4__ 1 + #define __CL_INT2__ 1 + #define __CL_UINT2__ 1 + #define __CL_ULONG1__ 1 + #define __CL_LONG1__ 1 + #define __CL_FLOAT2__ 1 +#endif + +#if defined( __AVX__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_float __cl_float8 __attribute__((vector_size(32))); + typedef cl_double __cl_double4 __attribute__((vector_size(32))); + #else + typedef __m256 __cl_float8; + typedef __m256d __cl_double4; + #endif + #define __CL_FLOAT8__ 1 + #define __CL_DOUBLE4__ 1 +#endif + +/* Define capabilities for anonymous struct members. */ +#if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ +#elif defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ __extension__ +#elif defined( _WIN32) && defined(_MSC_VER) + #if _MSC_VER >= 1500 + /* Microsoft Developer Studio 2008 supports anonymous structs, but + * complains by default. */ + #define __CL_HAS_ANON_STRUCT__ 1 + #define __CL_ANON_STRUCT__ + /* Disable warning C4201: nonstandard extension used : nameless + * struct/union */ + #pragma warning( push ) + #pragma warning( disable : 4201 ) + #endif +#else +#define __CL_HAS_ANON_STRUCT__ 0 +#define __CL_ANON_STRUCT__ +#endif + +/* Define alignment keys */ +#if defined( __GNUC__ ) + #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) +#elif defined( _WIN32) && (_MSC_VER) + /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ + /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ + /* #include */ + /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */ + #define CL_ALIGNED(_x) +#else + #warning Need to implement some method to align data here + #define CL_ALIGNED(_x) +#endif + +/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */ +#if __CL_HAS_ANON_STRUCT__ + /* .xyzw and .s0123...{f|F} are supported */ + #define CL_HAS_NAMED_VECTOR_FIELDS 1 + /* .hi and .lo are supported */ + #define CL_HAS_HI_LO_VECTOR_FIELDS 1 +#endif + +/* Define cl_vector types */ + +/* ---- cl_charn ---- */ +typedef union +{ + cl_char CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_char lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2; +#endif +}cl_char2; + +typedef union +{ + cl_char CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_char2 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[2]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4; +#endif +}cl_char4; + +/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */ +typedef cl_char4 cl_char3; + +typedef union +{ + cl_char CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_char4 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[4]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[2]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8; +#endif +}cl_char8; + +typedef union +{ + cl_char CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_char8 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[8]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[4]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8[2]; +#endif +#if defined( __CL_CHAR16__ ) + __cl_char16 v16; +#endif +}cl_char16; + + +/* ---- cl_ucharn ---- */ +typedef union +{ + cl_uchar CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uchar lo, hi; }; +#endif +#if defined( __cl_uchar2__) + __cl_uchar2 v2; +#endif +}cl_uchar2; + +typedef union +{ + cl_uchar CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uchar2 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[2]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4; +#endif +}cl_uchar4; + +/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */ +typedef cl_uchar4 cl_uchar3; + +typedef union +{ + cl_uchar CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uchar4 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[4]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[2]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8; +#endif +}cl_uchar8; + +typedef union +{ + cl_uchar CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uchar8 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[8]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[4]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8[2]; +#endif +#if defined( __CL_UCHAR16__ ) + __cl_uchar16 v16; +#endif +}cl_uchar16; + + +/* ---- cl_shortn ---- */ +typedef union +{ + cl_short CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_short lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2; +#endif +}cl_short2; + +typedef union +{ + cl_short CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_short2 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[2]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4; +#endif +}cl_short4; + +/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */ +typedef cl_short4 cl_short3; + +typedef union +{ + cl_short CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_short4 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[4]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[2]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8; +#endif +}cl_short8; + +typedef union +{ + cl_short CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_short8 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[8]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[4]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8[2]; +#endif +#if defined( __CL_SHORT16__ ) + __cl_short16 v16; +#endif +}cl_short16; + + +/* ---- cl_ushortn ---- */ +typedef union +{ + cl_ushort CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ushort lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2; +#endif +}cl_ushort2; + +typedef union +{ + cl_ushort CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ushort2 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[2]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4; +#endif +}cl_ushort4; + +/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */ +typedef cl_ushort4 cl_ushort3; + +typedef union +{ + cl_ushort CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ushort4 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[4]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[2]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8; +#endif +}cl_ushort8; + +typedef union +{ + cl_ushort CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ushort8 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[8]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[4]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8[2]; +#endif +#if defined( __CL_USHORT16__ ) + __cl_ushort16 v16; +#endif +}cl_ushort16; + + +/* ---- cl_halfn ---- */ +typedef union +{ + cl_half CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_half lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2; +#endif +}cl_half2; + +typedef union +{ + cl_half CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_half2 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[2]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4; +#endif +}cl_half4; + +/* cl_half3 is identical in size, alignment and behavior to cl_half4. See section 6.1.5. */ +typedef cl_half4 cl_half3; + +typedef union +{ + cl_half CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_half4 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[4]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4[2]; +#endif +#if defined( __CL_HALF8__ ) + __cl_half8 v8; +#endif +}cl_half8; + +typedef union +{ + cl_half CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_half8 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[8]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4[4]; +#endif +#if defined( __CL_HALF8__ ) + __cl_half8 v8[2]; +#endif +#if defined( __CL_HALF16__ ) + __cl_half16 v16; +#endif +}cl_half16; + +/* ---- cl_intn ---- */ +typedef union +{ + cl_int CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_int lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2; +#endif +}cl_int2; + +typedef union +{ + cl_int CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_int2 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[2]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4; +#endif +}cl_int4; + +/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */ +typedef cl_int4 cl_int3; + +typedef union +{ + cl_int CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_int4 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[4]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[2]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8; +#endif +}cl_int8; + +typedef union +{ + cl_int CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_int8 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[8]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[4]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8[2]; +#endif +#if defined( __CL_INT16__ ) + __cl_int16 v16; +#endif +}cl_int16; + + +/* ---- cl_uintn ---- */ +typedef union +{ + cl_uint CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uint lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2; +#endif +}cl_uint2; + +typedef union +{ + cl_uint CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uint2 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[2]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4; +#endif +}cl_uint4; + +/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */ +typedef cl_uint4 cl_uint3; + +typedef union +{ + cl_uint CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uint4 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[4]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[2]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8; +#endif +}cl_uint8; + +typedef union +{ + cl_uint CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uint8 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[8]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[4]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8[2]; +#endif +#if defined( __CL_UINT16__ ) + __cl_uint16 v16; +#endif +}cl_uint16; + +/* ---- cl_longn ---- */ +typedef union +{ + cl_long CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_long lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2; +#endif +}cl_long2; + +typedef union +{ + cl_long CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_long2 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[2]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4; +#endif +}cl_long4; + +/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */ +typedef cl_long4 cl_long3; + +typedef union +{ + cl_long CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_long4 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[4]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[2]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8; +#endif +}cl_long8; + +typedef union +{ + cl_long CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_long8 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[8]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[4]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8[2]; +#endif +#if defined( __CL_LONG16__ ) + __cl_long16 v16; +#endif +}cl_long16; + + +/* ---- cl_ulongn ---- */ +typedef union +{ + cl_ulong CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ulong lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2; +#endif +}cl_ulong2; + +typedef union +{ + cl_ulong CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ulong2 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[2]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4; +#endif +}cl_ulong4; + +/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */ +typedef cl_ulong4 cl_ulong3; + +typedef union +{ + cl_ulong CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ulong4 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[4]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[2]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8; +#endif +}cl_ulong8; + +typedef union +{ + cl_ulong CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ulong8 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[8]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[4]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8[2]; +#endif +#if defined( __CL_ULONG16__ ) + __cl_ulong16 v16; +#endif +}cl_ulong16; + + +/* --- cl_floatn ---- */ + +typedef union +{ + cl_float CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_float lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2; +#endif +}cl_float2; + +typedef union +{ + cl_float CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_float2 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[2]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4; +#endif +}cl_float4; + +/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */ +typedef cl_float4 cl_float3; + +typedef union +{ + cl_float CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_float4 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[4]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[2]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8; +#endif +}cl_float8; + +typedef union +{ + cl_float CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_float8 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[8]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[4]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8[2]; +#endif +#if defined( __CL_FLOAT16__ ) + __cl_float16 v16; +#endif +}cl_float16; + +/* --- cl_doublen ---- */ + +typedef union +{ + cl_double CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_double lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2; +#endif +}cl_double2; + +typedef union +{ + cl_double CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_double2 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[2]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4; +#endif +}cl_double4; + +/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */ +typedef cl_double4 cl_double3; + +typedef union +{ + cl_double CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_double4 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[4]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[2]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8; +#endif +}cl_double8; + +typedef union +{ + cl_double CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_double8 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[8]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[4]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8[2]; +#endif +#if defined( __CL_DOUBLE16__ ) + __cl_double16 v16; +#endif +}cl_double16; + +/* Macro to facilitate debugging + * Usage: + * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. + * The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \" + * Each line thereafter of OpenCL C source must end with: \n\ + * The last line ends in "; + * + * Example: + * + * const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\ + * kernel void foo( int a, float * b ) \n\ + * { \n\ + * // my comment \n\ + * *b[ get_global_id(0)] = a; \n\ + * } \n\ + * "; + * + * This should correctly set up the line, (column) and file information for your source + * string so you can do source level debugging. + */ +#define __CL_STRINGIFY( _x ) # _x +#define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x ) +#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" + +#ifdef __cplusplus +} +#endif + +#undef __CL_HAS_ANON_STRUCT__ +#undef __CL_ANON_STRUCT__ +#if defined( _WIN32) && defined(_MSC_VER) + #if _MSC_VER >=1500 + #pragma warning( pop ) + #endif +#endif + +#endif /* __CL_PLATFORM_H */ diff --git a/CL/cl_va_api_media_sharing_intel.h b/CL/cl_va_api_media_sharing_intel.h new file mode 100644 index 0000000..0e7cd4d --- /dev/null +++ b/CL/cl_va_api_media_sharing_intel.h @@ -0,0 +1,160 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +/*****************************************************************************\ + +Copyright (c) 2013-2019 Intel Corporation All Rights Reserved. + +THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE +MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +File Name: cl_va_api_media_sharing_intel.h + +Abstract: + +Notes: + +\*****************************************************************************/ + + +#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H +#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/****************************************** +* cl_intel_va_api_media_sharing extension * +*******************************************/ + +#define cl_intel_va_api_media_sharing 1 + +/* error codes */ +#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098 +#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099 +#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100 +#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101 + +/* cl_va_api_device_source_intel */ +#define CL_VA_API_DISPLAY_INTEL 0x4094 + +/* cl_va_api_device_set_intel */ +#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095 +#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096 + +/* cl_context_info */ +#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097 + +/* cl_mem_info */ +#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098 + +/* cl_image_info */ +#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099 + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A +#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B + +typedef cl_uint cl_va_api_device_source_intel; +typedef cl_uint cl_va_api_device_set_intel; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDsFromVA_APIMediaAdapterINTEL( + cl_platform_id platform, + cl_va_api_device_source_intel media_adapter_type, + void* media_adapter, + cl_va_api_device_set_intel media_adapter_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)( + cl_platform_id platform, + cl_va_api_device_source_intel media_adapter_type, + void* media_adapter, + cl_va_api_device_set_intel media_adapter_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromVA_APIMediaSurfaceINTEL( + cl_context context, + cl_mem_flags flags, + VASurfaceID* surface, + cl_uint plane, + cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)( + cl_context context, + cl_mem_flags flags, + VASurfaceID* surface, + cl_uint plane, + cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireVA_APIMediaSurfacesINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseVA_APIMediaSurfacesINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */ + diff --git a/CL/cl_version.h b/CL/cl_version.h new file mode 100644 index 0000000..f38280a --- /dev/null +++ b/CL/cl_version.h @@ -0,0 +1,81 @@ +/******************************************************************************* + * Copyright (c) 2018-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __CL_VERSION_H +#define __CL_VERSION_H + +/* Detect which version to target */ +#if !defined(CL_TARGET_OPENCL_VERSION) +#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)") +#define CL_TARGET_OPENCL_VERSION 220 +#endif +#if CL_TARGET_OPENCL_VERSION != 100 && \ + CL_TARGET_OPENCL_VERSION != 110 && \ + CL_TARGET_OPENCL_VERSION != 120 && \ + CL_TARGET_OPENCL_VERSION != 200 && \ + CL_TARGET_OPENCL_VERSION != 210 && \ + CL_TARGET_OPENCL_VERSION != 220 && \ + CL_TARGET_OPENCL_VERSION != 300 +#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220, 300). Defaulting to 220 (OpenCL 2.2)") +#undef CL_TARGET_OPENCL_VERSION +#define CL_TARGET_OPENCL_VERSION 220 +#endif + + +/* OpenCL Version */ +#if CL_TARGET_OPENCL_VERSION >= 300 && !defined(CL_VERSION_3_0) +#define CL_VERSION_3_0 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2) +#define CL_VERSION_2_2 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1) +#define CL_VERSION_2_1 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0) +#define CL_VERSION_2_0 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2) +#define CL_VERSION_1_2 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1) +#define CL_VERSION_1_1 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0) +#define CL_VERSION_1_0 1 +#endif + +/* Allow deprecated APIs for older OpenCL versions. */ +#if CL_TARGET_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS) +#define CL_USE_DEPRECATED_OPENCL_2_2_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) +#define CL_USE_DEPRECATED_OPENCL_2_1_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) +#define CL_USE_DEPRECATED_OPENCL_2_0_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_0_APIS +#endif + +#endif /* __CL_VERSION_H */ diff --git a/CL/opencl.h b/CL/opencl.h new file mode 100644 index 0000000..1c4e10c --- /dev/null +++ b/CL/opencl.h @@ -0,0 +1,33 @@ +/******************************************************************************* + * Copyright (c) 2008-2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ + +#ifndef __OPENCL_H +#define __OPENCL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_H */ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..3e20485 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,184 @@ +cmake_minimum_required(VERSION 3.13 FATAL_ERROR) + +project(BabelStream VERSION 3.5 LANGUAGES CXX) + +#set(CMAKE_VERBOSE_MAKEFILE ON) + +# some nicer defaults for standard C++ +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + + +#set(MODEL SYCL) +#set(SYCL_COMPILER COMPUTECPP) +#set(SYCL_COMPILER_DIR /home/tom/Desktop/computecpp_archive/ComputeCpp-CE-2.3.0-x86_64-linux-gnu) +#set(MODEL RAJA) +#set(RAJA_IN_TREE /home/tom/Downloads/RAJA-v0.13.0/) +#set(ENABLE_CUDA ON) +#set(TARGET NVIDIA) +#set(CUDA_TOOLKIT_ROOT_DIR /opt/cuda-11.2) +#set(CUDA_ARCH sm_70) +#set(BLT_DIR /home/tom/Downloads/blt-0.3.6/) + +#set(MODEL STD) +#set(ARCH cc70) +#set(CXX_EXTRA_FLAGS -v) + +#set(MODEL CUDA) +#set(ARCH sm_70) +#set(CMAKE_CUDA_COMPILER /opt/cuda-11.2/bin/nvcc) + +#set(MODEL OCL) +#set(OpenCL_LIBRARY /opt/rocm-4.0.0/opencl/lib/libOpenCL.so) +#set(OpenCL_INCLUDE_DIR /opt/rocm-4.0.0/opencl/lib) +#set(RELEASE_FLAGS -Ofast) +#set(CXX_EXTRA_FLAGS -O2) + +#set(CMAKE_CXX_COMPILER /usr/lib/aomp/bin/clang++) +#set(MODEL OMP) +##set(OFFLOAD "AMD:gfx803") +#set(OFFLOAD "NVIDIA:sm_35") +#set(CXX_EXTRA_FLAGS --cuda-path=/opt/cuda-10.2/) + +#set(OFFLOAD "AMD:_70") +#set(CXX_EXTRA_FLAGS --cuda-path=/opt/cuda-10.2/ --gcc-toolchain=/home/tom/spack/opt/spack/linux-fedora33-zen2/gcc-10.2.1/gcc-8.3.0-latmjo2hl2yv53255xkwko7k3y7bx2vv) +#set(CXX_EXTRA_LINKER_FLAGS ) +#set(MODEL HIP) + +#set(MODEL KOKKOS) +#set(KOKKOS_IN_TREE /home/tom/Downloads/kokkos-3.3.00/) + +# the final executable name +set(EXE_NAME babelstream) + +# select default build type +set(CMAKE_BUILD_TYPE "Release") + +if (NOT CMAKE_BUILD_TYPE) + message("No CMAKE_BUILD_TYPE specified, defaulting to 'Release'") + set(CMAKE_BUILD_TYPE "Release") +endif () + +string(TOUPPER ${CMAKE_BUILD_TYPE} BUILD_TYPE) +if ((NOT BUILD_TYPE STREQUAL RELEASE) AND (NOT BUILD_TYPE STREQUAL DEBUG)) + message(FATAL_ERROR "Only Release or Debug is supported, got `${CMAKE_BUILD_TYPE}`") +endif () + + +# setup common build flag defaults if there are no overrides +if (NOT DEFINED DEBUG_FLAGS) + # XXX switch on compiler type for the best default here, right now it's just GCC-compatible flags + set(DEBUG_FLAGS -O2 -fno-omit-frame-pointer) +endif () +if (NOT DEFINED RELEASE_FLAGS) + # XXX switch on compiler type for the best default here, right now it's just GCC-compatible flags + set(RELEASE_FLAGS -O3) +endif () + +message(STATUS "Common ${CMAKE_BUILD_TYPE} flags are `${${BUILD_TYPE}_FLAGS}`, set ${BUILD_TYPE}_FLAGS to override") + +macro(hint_flag FLAG DESCRIPTION) + if (NOT DEFINED ${FLAG}) + message(STATUS "${FLAG}: ${DESCRIPTION}") + else () + # i.e. `-DFOO="-a -b"` becomes CMake's semicolon separated list `FOO=`-a;-b` + separate_arguments(${FLAG}) + endif () +endmacro() + +# hint common extra flag options for all models if they are not set +hint_flag(CXX_EXTRA_FLAGS " + Appends to common compile flags, this will be appended at link phase as well. + To use separate flags at link phase, set `CXX_EXTRA_LINK_FLAGS`") +hint_flag(CXX_EXTRA_LINK_FLAGS " + Appends to link flags (i.e appended only at link phase) which appears *before* the objects. + Do not use this for linking libraries as the link line is order dependent") +hint_flag(CXX_EXTRA_LIBRARIES " + Append to link flags which appears *after* the objects, use this for linking extra libraries (e.g `-lmylib`, or simply `mylib`)") +hint_flag(CXX_EXTRA_LINKER_FLAGS " + Append to linker flags (i.e GCC's -Wl or equivalent)") + +# copy CXX_EXTRA_FLAGS <- CXX_EXTRA_LINK_FLAGS +if ((DEFINED CXX_EXTRA_FLAGS) AND (NOT DEFINED CXX_EXTRA_LINK_FLAGS)) + set(CXX_EXTRA_LINK_FLAGS ${CXX_EXTRA_FLAGS}) +endif () + +# include our macros +include(register_models.cmake) + +# register out models +register_model(OMP OMP OMPStream.cpp) +register_model(OCL OCL OCLStream.cpp) +register_model(STD STD STDStream.cpp) +register_model(STD20 STD20 STD20Stream.cpp) +register_model(HIP HIP HIPStream.cpp) +register_model(CUDA CUDA CUDAStream.cu) +register_model(KOKKOS KOKKOS KokkosStream.cpp) +register_model(SYCL SYCL SYCLStream.cpp) +register_model(ACC ACC ACCStream.cpp) +# defining RAJA collides with the RAJA namespace so USE_RAJA +register_model(RAJA USE_RAJA RAJAStream.cpp) + + +set(USAGE ON CACHE BOOL "Whether to print all custom flags for the selected model") + +message(STATUS "Available models: ${REGISTERED_MODELS}") +if (NOT DEFINED MODEL) + message(FATAL_ERROR "MODEL is unspecified, pick one from the available models") +else () + message(STATUS "Selected model : ${MODEL}") +endif () + +# load the $MODEL.cmake file and setup the correct IMPL_* based on $MODEL +load_model(${MODEL}) + +if (USAGE) + # print the usage of the registered flag options + registered_flags_action(print RESULT) + message(STATUS "${RESULT}") +endif () +# check required/set default for all registered flag options +registered_flags_action(check RESULT) +message(STATUS "${RESULT}") + +# run model specific setup, i.e append build flags, etc +setup() + +# CMake insists that -O2 (or equivalent) is the universally accepted optimisation level +# we remove that here and use our own _FLAGS +string(REGEX REPLACE "([\\/\\-]O.)" "" + CMAKE_CXX_FLAGS_${BUILD_TYPE} "${CMAKE_CXX_FLAGS_${BUILD_TYPE}}") + +message(STATUS "CXX vendor : ${CMAKE_CXX_COMPILER_ID} (${CMAKE_CXX_COMPILER})") +message(STATUS "Sources : ${IMPL_SOURCES}") +message(STATUS "Libraries : ${LINK_LIBRARIES}") +message(STATUS "CXX Flags : ${CMAKE_CXX_FLAGS_${BUILD_TYPE}} ${${BUILD_TYPE}_FLAGS} ${CXX_EXTRA_FLAGS} ") +message(STATUS "Link Flags : ${LINK_FLAGS} ${CXX_EXTRA_LINK_FLAGS}") +message(STATUS "Linker Flags: ${CMAKE_EXE_LINKER_FLAGS} ${CXX_EXTRA_LINKER_FLAGS} ") +message(STATUS "Defs : ${IMPL_DEFINITIONS}") +message(STATUS "Executable : ${EXE_NAME}") + +# below we have all the usual CMake target setup steps + +add_executable(${EXE_NAME} ${IMPL_SOURCES} main.cpp) +target_link_libraries(${EXE_NAME} PUBLIC ${LINK_LIBRARIES}) +target_compile_definitions(${EXE_NAME} PUBLIC ${IMPL_DEFINITIONS}) + +if (CXX_EXTRA_LIBRARIES) + target_link_libraries(${EXE_NAME} PUBLIC ${CXX_EXTRA_LIBRARIES}) +endif () + +target_compile_options(${EXE_NAME} PUBLIC "$<$:${RELEASE_FLAGS};${CXX_EXTRA_FLAGS}>") +target_compile_options(${EXE_NAME} PUBLIC "$<$:${DEBUG_FLAGS};${CXX_EXTRA_FLAGS}>") + +target_link_options(${EXE_NAME} PUBLIC LINKER:${CXX_EXTRA_LINKER_FLAGS}) +target_link_options(${EXE_NAME} PUBLIC ${LINK_FLAGS} ${CXX_EXTRA_LINK_FLAGS}) + +# some models require the target to be already specified so they can finish their setup here +# this only happens if the MODEL.cmake definition contains the `setup_target` macro +if (COMMAND setup_target) + setup_target(${EXE_NAME}) +endif () + + + diff --git a/CUDA.cmake b/CUDA.cmake new file mode 100644 index 0000000..0fb804f --- /dev/null +++ b/CUDA.cmake @@ -0,0 +1,30 @@ + +register_flag_optional(CMAKE_CXX_COMPILER + "Any CXX compiler that is supported by CMake detection, this is used for host compilation" + "c++") + +register_flag_optional(MEM "Device memory mode: + DEFAULT - allocate host and device memory pointers. + MANAGED - use CUDA Managed Memory. + PAGEFAULT - shared memory, only host pointers allocated." + "DEFAULT") + +register_flag_required(CMAKE_CUDA_COMPILER + "Path to the CUDA nvcc compiler") + +# XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes +register_flag_required(CUDA_ARCH + "Nvidia architecture, will be passed in via `-arch=` (e.g `sm_70`) for nvcc") + +register_flag_optional(CUDA_EXTRA_FLAGS + "Additional CUDA flags passed to nvcc, this is appended after `CUDA_ARCH`" + "") + + +macro(setup) + enable_language(CUDA) + register_definitions(MEM=${MEM}) + set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} -arch=${CUDA_ARCH} ${CUDA_EXTRA_FLAGS}) + message(STATUS "NVCC flags: ${CMAKE_CUDA_FLAGS}") +endmacro() + diff --git a/HIP.cmake b/HIP.cmake new file mode 100644 index 0000000..78150c4 --- /dev/null +++ b/HIP.cmake @@ -0,0 +1,7 @@ + +register_flag_required(CMAKE_CXX_COMPILER + "Absolute path to the AMD HIP C++ compiler") + +macro(setup) + # nothing to do here as hipcc does everything correctly, what a surprise! +endmacro() \ No newline at end of file diff --git a/KOKKOS.cmake b/KOKKOS.cmake new file mode 100644 index 0000000..90abb19 --- /dev/null +++ b/KOKKOS.cmake @@ -0,0 +1,30 @@ + +register_flag_optional(CMAKE_CXX_COMPILER + "Any CXX compiler that is supported by CMake detection and RAJA. + See https://github.com/kokkos/kokkos#primary-tested-compilers-on-x86-are" + "c++") + +register_flag_required(KOKKOS_IN_TREE + "Absolute path to the *source* distribution directory of Kokkos. + Remember to append Kokkos specific flags as well, for example: + + -DKOKKOS_IN_TREE=... -DKokkos_ENABLE_OPENMP=ON -DKokkos_ARCH_ZEN=ON ... + + See https://github.com/kokkos/kokkos/blob/master/BUILD.md for all available options") + +macro(setup) + + cmake_policy(SET CMP0074 NEW) #see https://github.com/kokkos/kokkos/blob/master/BUILD.md + + message(STATUS "Building using in-tree Kokkos source at `${KOKKOS_IN_TREE}`") + + if (EXISTS "${KOKKOS_IN_TREE}") + add_subdirectory(${KOKKOS_IN_TREE} ${CMAKE_BINARY_DIR}/kokkos) + register_link_library(Kokkos::kokkos) + else () + message(FATAL_ERROR "`${KOKKOS_IN_TREE}` does not exist") + endif () + +endmacro() + + diff --git a/OCL.cmake b/OCL.cmake new file mode 100644 index 0000000..2be3981 --- /dev/null +++ b/OCL.cmake @@ -0,0 +1,17 @@ + +register_flag_optional(CMAKE_CXX_COMPILER + "Any CXX compiler that is supported by CMake detection" + "c++") + +register_flag_optional(OpenCL_LIBRARY + "Path to OpenCL library, usually called libOpenCL.so" + "${OpenCL_LIBRARY}") + + +macro(setup) + # don't point to the CL dir as the imports already have the CL prefix + set(OpenCL_INCLUDE_DIR "${CMAKE_SOURCE_DIR}") + find_package(OpenCL REQUIRED) + register_link_library(OpenCL::OpenCL) +endmacro() + diff --git a/OMP.cmake b/OMP.cmake new file mode 100644 index 0000000..2d16b81 --- /dev/null +++ b/OMP.cmake @@ -0,0 +1,131 @@ +# Compiler ID for reference (as of CMake 3.13) +# Absoft = Absoft Fortran (absoft.com) +# ADSP = Analog VisualDSP++ (analog.com) +# AppleClang = Apple Clang (apple.com) +# ARMCC = ARM Compiler (arm.com) +# Bruce = Bruce C Compiler +# CCur = Concurrent Fortran (ccur.com) +# Clang = LLVM Clang (clang.llvm.org) +# Cray = Cray Compiler (cray.com) +# Embarcadero, Borland = Embarcadero (embarcadero.com) +# G95 = G95 Fortran (g95.org) +# GNU = GNU Compiler Collection (gcc.gnu.org) +# HP = Hewlett-Packard Compiler (hp.com) +# IAR = IAR Systems (iar.com) +# Intel = Intel Compiler (intel.com) +# MIPSpro = SGI MIPSpro (sgi.com) +# MSVC = Microsoft Visual Studio (microsoft.com) +# NVIDIA = NVIDIA CUDA Compiler (nvidia.com) +# OpenWatcom = Open Watcom (openwatcom.org) +# PGI = The Portland Group (pgroup.com) +# Flang = Flang Fortran Compiler +# PathScale = PathScale (pathscale.com) +# SDCC = Small Device C Compiler (sdcc.sourceforge.net) +# SunPro = Oracle Solaris Studio (oracle.com) +# TI = Texas Instruments (ti.com) +# TinyCC = Tiny C Compiler (tinycc.org) +# XL, VisualAge, zOS = IBM XL (ibm.com) + +# These are only added in CMake 3.15: +# ARMClang = ARM Compiler based on Clang (arm.com) +# These are only added in CMake 3.20: +# NVHPC = NVIDIA HPC SDK Compiler (nvidia.com) + + +#predefined offload flags based on compiler id +set(OMP_FLAGS_INTEL_CPU -qopt-streaming-stores=always) +set(OMP_FLAGS_OFFLOAD_INTEL -qnextgen -fiopenmp -fopenmp-targets=spir64) +set(OMP_FLAGS_OFFLOAD_GNU_NVIDIA -foffload=nvptx-none) +set(OMP_FLAGS_OFFLOAD_GNU_AMD -foffload=amdgcn-amdhsa) +set(OMP_FLAGS_OFFLOAD_CLANG_NVIDIA + -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda) +set(OMP_FLAGS_OFFLOAD_CLANG_AMD + -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa) +set(OMP_FLAGS_OFFLOAD_CLANG_ARCH_FLAG -march=) + +register_flag_optional(CMAKE_CXX_COMPILER + "Any CXX compiler that supports OpenMP as per CMake detection (and offloading if enabled with `OFFLOAD`)" + "c++") + +register_flag_optional(OFFLOAD + "Whether to use OpenMP offload, the format is |ON|OFF. + We support a small set of known offload flags for clang, gcc, and icpx. + However, as offload support is rapidly evolving, we recommend you directly supply them via OFFLOAD_FLAGS. + For example: + * OFFLOAD=NVIDIA:sm_60 + * OFFLOAD=AMD:gfx906 + * OFFLOAD=INTEL + * OFFLOAD=ON OFFLOAD_FLAGS=..." + OFF) + +register_flag_optional(OFFLOAD_FLAGS + "If OFFLOAD is enabled, this *overrides* the default offload flags" + "") + +register_flag_optional(OFFLOAD_APPEND_LINK_FLAG + "If enabled, this appends all resolved offload flags (OFFLOAD= or directly from OFFLOAD_FLAGS) to the link flags. + This is required for most offload implementations so that offload libraries can linked correctly." + ON) + + +macro(setup) + set(CMAKE_CXX_STANDARD 17) + + find_package(OpenMP REQUIRED) + register_link_library(OpenMP::OpenMP_CXX) + + string(TOUPPER ${CMAKE_CXX_COMPILER_ID} COMPILER) + + if (("${OFFLOAD}" STREQUAL OFF) OR (NOT DEFINED OFFLOAD)) + # no offload + + elseif ("${OFFLOAD}" STREQUAL ON) + # offload but with custom flags + register_definitions(OMP_TARGET_GPU) + separate_arguments(OFFLOAD_FLAGS) + set(OMP_FLAGS ${OFFLOAD_FLAGS}) + elseif ((DEFINED OFFLOAD) AND OFFLOAD_FLAGS) + # offload but OFFLOAD_FLAGS overrides + register_definitions(OMP_TARGET_GPU) + separate_arguments(OFFLOAD_FLAGS) + list(OMP_FLAGS APPEND ${OFFLOAD_FLAGS}) + else () + + # handle the vendor:arch value + string(REPLACE ":" ";" OFFLOAD_TUPLE "${OFFLOAD}") + + list(LENGTH OFFLOAD_TUPLE LEN) + if (LEN EQUAL 1) + # offload with tuple + list(GET OFFLOAD_TUPLE 0 OFFLOAD_VENDOR) + # append OMP_FLAGS_OFFLOAD_ if exists + list(APPEND OMP_FLAGS ${OMP_FLAGS_OFFLOAD_${OFFLOAD_VENDOR}}) + + elseif (LEN EQUAL 2) + # offload with tuple + list(GET OFFLOAD_TUPLE 0 OFFLOAD_VENDOR) + list(GET OFFLOAD_TUPLE 1 OFFLOAD_ARCH) + + # append OMP_FLAGS_OFFLOAD__ if exists + list(APPEND OMP_FLAGS ${OMP_FLAGS_OFFLOAD_${COMPILER}_${OFFLOAD_VENDOR}}) + # append offload arch if OMP_FLAGS_OFFLOAD__ARCH_FLAG if exists + if (DEFINED OMP_FLAGS_OFFLOAD_${COMPILER}_ARCH_FLAG) + list(APPEND OMP_FLAGS + "${OMP_FLAGS_OFFLOAD_${COMPILER}_ARCH_FLAG}${OFFLOAD_ARCH}") + endif () + else () + message(FATAL_ERROR "Unrecognised OFFLOAD format: `${OFFLOAD}`, consider directly using OFFLOAD_FLAGS") + endif () + + endif () + + + message(STATUS "OMP CXX flags : ${OMP_FLAGS}") + message(STATUS "OMP Link flags : ${OMP_LINK_FLAGS}") + # propagate flags to linker so that it links with the offload stuff as well + register_append_cxx_flags(ANY ${OMP_FLAGS}) + if (OFFLOAD_APPEND_LINK_FLAG) + register_append_link_flags(${OMP_FLAGS}) + endif () +endmacro() + diff --git a/RAJA.cmake b/RAJA.cmake new file mode 100644 index 0000000..876896e --- /dev/null +++ b/RAJA.cmake @@ -0,0 +1,84 @@ + +register_flag_optional(CMAKE_CXX_COMPILER + "Any CXX compiler that is supported by CMake detection and RAJA. + See https://raja.readthedocs.io/en/main/getting_started.html#build-and-install" + "c++") + +register_flag_required(RAJA_IN_TREE + "Absolute path to the *source* distribution directory of RAJA. + Make sure to use the release version of RAJA or clone RAJA recursively with submodules. + Remember to append RAJA specific flags as well, for example: + + -DRAJA_IN_TREE=... -DENABLE_OPENMP=ON -DENABLE_CUDA=ON ... + + See https://github.com/LLNL/RAJA/blob/08cbbafd2d21589ebf341f7275c229412d0fe903/CMakeLists.txt#L44 for all available options +") + +register_flag_optional(TARGET + "Target offload device, implemented values are CPU, NVIDIA" + CPU) + +register_flag_optional(CUDA_TOOLKIT_ROOT_DIR + "[TARGET==NVIDIA only] Path to the CUDA toolkit directory (e.g `/opt/cuda-11.2`) if the ENABLE_CUDA flag is specified for RAJA" "") + +# XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes +register_flag_optional(CUDA_ARCH + "[TARGET==NVIDIA only] Nvidia architecture, will be passed in via `-arch=` (e.g `sm_70`) for nvcc" + "") + +register_flag_optional(CUDA_EXTRA_FLAGS + "[TARGET==NVIDIA only] Additional CUDA flags passed to nvcc, this is appended after `CUDA_ARCH`" + "") + + +macro(setup) + + + if (${TARGET} STREQUAL "CPU") + register_definitions(RAJA_TARGET_CPU) + else () + register_definitions(RAJA_TARGET_GPU) + endif () + + + if (EXISTS "${RAJA_IN_TREE}") + + message(STATUS "Building using in-tree RAJA source at `${RAJA_IN_TREE}`") + + set(CMAKE_CXX_STANDARD 14) + # don't build anything that isn't the RAJA library itself, by default their cmake def builds everything, whyyy? + set(ENABLE_TESTS OFF CACHE BOOL "") + set(ENABLE_EXAMPLES OFF CACHE BOOL "") + set(ENABLE_REPRODUCERS OFF CACHE BOOL "") + set(ENABLE_EXERCISES OFF CACHE BOOL "") + set(ENABLE_DOCUMENTATION OFF CACHE BOOL "") + set(ENABLE_BENCHMARKS OFF CACHE BOOL "") + set(ENABLE_CUDA ${ENABLE_CUDA} CACHE BOOL "" FORCE) + + if (ENABLE_CUDA) + # RAJA needs all the cuda stuff setup before including! + set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc) + set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-extended-lambda -arch=${CUDA_ARCH}" ${CUDA_EXTRA_FLAGS}) + list(APPEND CMAKE_CUDA_FLAGS) + + message(STATUS "NVCC flags: ${CMAKE_CUDA_FLAGS}") + endif () + + add_subdirectory(${RAJA_IN_TREE} ${CMAKE_BINARY_DIR}/raja) + register_link_library(RAJA) + # RAJA's cmake screws with where the binary will end up, resetting it here: + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + else () + message(FATAL_ERROR "`${RAJA_IN_TREE}` does not exist") + endif () + + + if (ENABLE_CUDA) + # RAJA needs the codebase to be compiled with nvcc, so we tell cmake to treat sources as *.cu + enable_language(CUDA) + set_source_files_properties(RAJAStream.cpp PROPERTIES LANGUAGE CUDA) + set_source_files_properties(main.cpp PROPERTIES LANGUAGE CUDA) + endif () + + +endmacro() diff --git a/RAJAStream.cpp b/RAJAStream.cpp index 2d24f2a..44db5ed 100644 --- a/RAJAStream.cpp +++ b/RAJAStream.cpp @@ -5,10 +5,10 @@ // For full license terms please see the LICENSE file distributed with this // source code +#include #include "RAJAStream.hpp" using RAJA::forall; -using RAJA::RangeSegment; #ifndef ALIGNMENT #define ALIGNMENT (2*1024*1024) // 2MB @@ -16,10 +16,8 @@ using RAJA::RangeSegment; template RAJAStream::RAJAStream(const int ARRAY_SIZE, const int device_index) - : array_size(ARRAY_SIZE) + : array_size(ARRAY_SIZE), range(0, ARRAY_SIZE) { - RangeSegment seg(0, ARRAY_SIZE); - index_set.push_back(seg); #ifdef RAJA_TARGET_CPU d_a = (T*)aligned_alloc(ALIGNMENT, sizeof(T)*array_size); @@ -53,7 +51,7 @@ void RAJAStream::init_arrays(T initA, T initB, T initC) T* RAJA_RESTRICT a = d_a; T* RAJA_RESTRICT b = d_b; T* RAJA_RESTRICT c = d_c; - forall(index_set, [=] RAJA_DEVICE (RAJA::Index_type index) + forall(range, [=] RAJA_DEVICE (RAJA::Index_type index) { a[index] = initA; b[index] = initB; @@ -75,7 +73,7 @@ void RAJAStream::copy() { T* RAJA_RESTRICT a = d_a; T* RAJA_RESTRICT c = d_c; - forall(index_set, [=] RAJA_DEVICE (RAJA::Index_type index) + forall(range, [=] RAJA_DEVICE (RAJA::Index_type index) { c[index] = a[index]; }); @@ -87,7 +85,7 @@ void RAJAStream::mul() T* RAJA_RESTRICT b = d_b; T* RAJA_RESTRICT c = d_c; const T scalar = startScalar; - forall(index_set, [=] RAJA_DEVICE (RAJA::Index_type index) + forall(range, [=] RAJA_DEVICE (RAJA::Index_type index) { b[index] = scalar*c[index]; }); @@ -99,7 +97,7 @@ void RAJAStream::add() T* RAJA_RESTRICT a = d_a; T* RAJA_RESTRICT b = d_b; T* RAJA_RESTRICT c = d_c; - forall(index_set, [=] RAJA_DEVICE (RAJA::Index_type index) + forall(range, [=] RAJA_DEVICE (RAJA::Index_type index) { c[index] = a[index] + b[index]; }); @@ -112,12 +110,20 @@ void RAJAStream::triad() T* RAJA_RESTRICT b = d_b; T* RAJA_RESTRICT c = d_c; const T scalar = startScalar; - forall(index_set, [=] RAJA_DEVICE (RAJA::Index_type index) + forall(range, [=] RAJA_DEVICE (RAJA::Index_type index) { a[index] = b[index] + scalar*c[index]; }); } +template +void RAJAStream::nstream() +{ + // TODO implement me! + std::cerr << "Not implemented yet!" << std::endl; + throw std::runtime_error("Not implemented yet!"); +} + template T RAJAStream::dot() { @@ -126,7 +132,7 @@ T RAJAStream::dot() RAJA::ReduceSum sum(0.0); - forall(index_set, [=] RAJA_DEVICE (RAJA::Index_type index) + forall(range, [=] RAJA_DEVICE (RAJA::Index_type index) { sum += a[index] * b[index]; }); diff --git a/RAJAStream.hpp b/RAJAStream.hpp index bfcc70f..a716726 100644 --- a/RAJAStream.hpp +++ b/RAJAStream.hpp @@ -8,34 +8,41 @@ #include #include -#include "RAJA/RAJA.hxx" +#include "RAJA/RAJA.hpp" #include "Stream.h" #define IMPLEMENTATION_STRING "RAJA" #ifdef RAJA_TARGET_CPU -typedef RAJA::IndexSet::ExecPolicy< - RAJA::seq_segit, - RAJA::omp_parallel_for_exec> policy; +// TODO verify old and new templates are semantically equal +//typedef RAJA::ExecPolicy< +// RAJA::seq_segit, +// RAJA::omp_parallel_for_exec> policy; + +typedef RAJA::omp_parallel_for_exec policy; typedef RAJA::omp_reduce reduce_policy; #else const size_t block_size = 128; -typedef RAJA::IndexSet::ExecPolicy< - RAJA::seq_segit, - RAJA::cuda_exec> policy; -typedef RAJA::cuda_reduce reduce_policy; +// TODO verify old and new templates are semantically equal +//typedef RAJA::IndexSet::ExecPolicy< +// RAJA::seq_segit, +// RAJA::cuda_exec> policy; +//typedef RAJA::cuda_reduce reduce_policy; +typedef RAJA::cuda_exec policy; +typedef RAJA::cuda_reduce reduce_policy; #endif +using RAJA::RangeSegment; + + template class RAJAStream : public Stream { protected: // Size of arrays - int array_size; - - // Contains iteration space - RAJA::IndexSet index_set; + const int array_size; + const RangeSegment range; // Device side pointers to arrays T* d_a; @@ -51,7 +58,8 @@ class RAJAStream : public Stream virtual void add() override; virtual void mul() override; virtual void triad() override; - virtual T dot() override; + virtual void nstream() override; + virtual T dot() override; virtual void init_arrays(T initA, T initB, T initC) override; virtual void read_arrays( diff --git a/README.md b/README.md index 25ba9ae..de8401e 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,49 @@ Usage Drivers, compiler and software applicable to whichever implementation you would like to build against is required. +### CMake + +The project supports building with CMake >= 3.13.0. +As with any CMake project, first configure the project: + +```shell +> cd babelstream +> cmake -Bbuild -H. -DMODEL= # configure the build, build type defaults to Release +> cmake --build build # compile it +> ./build/babelstream # executable available at ./build/ +``` + +To find out what flag each model supports or requires, simply configure while only specifying the model. +For example: +```shell +> cd babelstream +> cmake -Bbuild -H. -DMODEL=OCL +... +- Common Release flags are `-O3`, set RELEASE_FLAGS to override +-- CXX_EXTRA_FLAGS: + Appends to common compile flags, this will be appended at link phase at well. + To use separate flags at link time, set `CXX_EXTRA_LINKER_FLAGS` +-- CXX_EXTRA_LINK_FLAGS: + Appends to link flags (i.e appended only at link phase) which appears *before* the objects. + Do not use this for linking libraries as the link line is order dependent +-- CXX_EXTRA_LIBRARIES: + Append to link flags which appears *after* the objects, use this for linking extra libraries (e.g `-lmylib`, or simply `mylib`) +-- CXX_EXTRA_LINKER_FLAGS: + Append to linker flags (i.e GCC's -Wl or equivalent) +-- Available models: OMP;OCL;STD;STD20;HIP;CUDA;KOKKOS;SYCL;ACC;RAJA +-- Selected model : OCL +-- Supported flags: + + CMAKE_CXX_COMPILER (optional, default=c++): Any CXX compiler that is supported by CMake detection + OpenCL_LIBRARY (optional, default=): Path to OpenCL library, usually called libOpenCL.so +... +``` +Alternatively, refer to the [CI script](./ci-test-compile.sh) which test-compiles most of the models and see which flags they use. + +*It is recommended that you delete the `build` directory when you any of the build flags.* + +### GNU Make + We have supplied a series of Makefiles, one for each programming model, to assist with building. The Makefiles contain common build options, and should be simple to customise for your needs too. @@ -68,8 +111,7 @@ Pass in extra flags via the `EXTRA_FLAGS` option. The binaries are named in the form `-stream`. -Building Kokkos ---------------- +#### Building Kokkos for Make Kokkos version >= 3 requires setting the `KOKKOS_PATH` flag to the *source* directory of a distribution. For example: @@ -83,8 +125,7 @@ make -f Kokkos.make KOKKOS_PATH=~/kokkos-3.1.01 ``` See make output for more information on supported flags. -Building RAJA -------------- +#### Building RAJA for Make We use the following command to build RAJA using the Intel Compiler. ``` diff --git a/STD.cmake b/STD.cmake new file mode 100644 index 0000000..ef69f30 --- /dev/null +++ b/STD.cmake @@ -0,0 +1,33 @@ + +register_flag_optional(CMAKE_CXX_COMPILER + "Any CXX compiler that is supported by CMake detection" + "c++") + +register_flag_optional(NVHPC_OFFLOAD + "Enable offloading support (via the non-standard `-stdpar`) for the new NVHPC SDK. + The values are Nvidia architectures in ccXY format will be passed in via `-gpu=` (e.g `cc70`) + + Possible values are: + cc35 - Compile for compute capability 3.5 + cc50 - Compile for compute capability 5.0 + cc60 - Compile for compute capability 6.0 + cc62 - Compile for compute capability 6.2 + cc70 - Compile for compute capability 7.0 + cc72 - Compile for compute capability 7.2 + cc75 - Compile for compute capability 7.5 + cc80 - Compile for compute capability 8.0 + ccall - Compile for all supported compute capabilities" + "") + +macro(setup) + set(CMAKE_CXX_STANDARD 17) + + if (NVHPC_OFFLOAD) + set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD}) + # propagate flags to linker so that it links with the gpu stuff as well + register_append_cxx_flags(ANY ${NVHPC_FLAGS}) + register_append_link_flags(${NVHPC_FLAGS}) + endif () + + +endmacro() diff --git a/STD20.cmake b/STD20.cmake new file mode 100644 index 0000000..46a0c61 --- /dev/null +++ b/STD20.cmake @@ -0,0 +1,15 @@ + +register_flag_optional(CMAKE_CXX_COMPILER + "Any CXX compiler that is supported by CMake detection and supports C++20 Ranges" + "c++") + +macro(setup) + + # TODO this needs to eventually be removed when CMake adds proper C++20 support or at least update the flag used here + + # C++ 2a is too new, disable CMake's std flags completely: + set(CMAKE_CXX_EXTENSIONS OFF) + set(CMAKE_CXX_STANDARD_REQUIRED OFF) + # and append our own: + register_append_cxx_flags(ANY -std=c++2a) +endmacro() diff --git a/STDStream.h b/STDStream.h index 9ff7800..2249812 100644 --- a/STDStream.h +++ b/STDStream.h @@ -24,7 +24,7 @@ class STDStream : public Stream T *c; public: - STDStream(const int, int); + STDStream(const int, int) noexcept; ~STDStream(); virtual void copy() override; diff --git a/SYCL.cmake b/SYCL.cmake new file mode 100644 index 0000000..1ff6596 --- /dev/null +++ b/SYCL.cmake @@ -0,0 +1,85 @@ + +register_flag_optional(CMAKE_CXX_COMPILER + "Any CXX compiler that is supported by CMake detection, this is used for host compilation when required by the SYCL compiler" + "c++") + +register_flag_required(SYCL_COMPILER + "Compile using the specified SYCL compiler implementation + Supported values are + ONEAPI-DPCPP - dpc++ that is part of an oneAPI Base Toolkit distribution (https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit.html) + DPCPP - dpc++ as a standalone compiler (https://github.com/intel/llvm) + HIPSYCL - hipSYCL compiler (https://github.com/illuhad/hipSYCL) + COMPUTECPP - ComputeCpp compiler (https://developer.codeplay.com/products/computecpp/ce/home)") + +register_flag_optional(SYCL_COMPILER_DIR + "Absolute path to the selected SYCL compiler directory, most are packaged differently so set the path according to `SYCL_COMPILER`: + ONEAPI-DPCPP - not required but `dpcpp` must be on PATH, load oneAPI as per documentation (i.e `source /opt/intel/oneapi/setvars.sh` first) + HIPSYCL|DPCPP|COMPUTECPP - set to the root of the binary distribution that contains at least `bin/`, `include/`, and `lib/`" + "") + +register_flag_optional(OpenCL_LIBRARY + "[ComputeCpp only] Path to OpenCL library, usually called libOpenCL.so" + "${OpenCL_LIBRARY}") + +macro(setup) + set(CMAKE_CXX_STANDARD 17) + + + if (${SYCL_COMPILER} STREQUAL "HIPSYCL") + + + set(hipSYCL_DIR ${SYCL_COMPILER_DIR}/lib/cmake/hipSYCL) + + if (NOT EXISTS "${hipSYCL_DIR}") + message(WARNING "Falling back to hipSYCL < 0.9.0 CMake structure") + set(hipSYCL_DIR ${SYCL_COMPILER_DIR}/lib/cmake) + endif () + if (NOT EXISTS "${hipSYCL_DIR}") + message(FATAL_ERROR "Can't find the appropriate CMake definitions for hipSYCL") + endif () + + # register_definitions(_GLIBCXX_USE_CXX11_ABI=0) + find_package(hipSYCL CONFIG REQUIRED) + message(STATUS "ok") + + elseif (${SYCL_COMPILER} STREQUAL "COMPUTECPP") + + list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules) + set(ComputeCpp_DIR ${SYCL_COMPILER_DIR}) + set(COMPUTECPP_USER_FLAGS -O3 -no-serial-memop) + + # don't point to the CL dir as the imports already have the CL prefix + set(OpenCL_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/CL") + + register_definitions(CL_TARGET_OPENCL_VERSION=220 _GLIBCXX_USE_CXX11_ABI=0) + # ComputeCpp needs OpenCL + find_package(ComputeCpp REQUIRED) + + elseif (${SYCL_COMPILER} STREQUAL "DPCPP") + set(CMAKE_CXX_COMPILER ${SYCL_COMPILER_DIR}/bin/clang++) + include_directories(${SYCL_COMPILER_DIR}/include/sycl) + register_definitions(CL_TARGET_OPENCL_VERSION=220) + register_append_cxx_flags(ANY -fsycl) + register_append_link_flags(-fsycl) + elseif (${SYCL_COMPILER} STREQUAL "ONEAPI-DPCPP") + set(CMAKE_CXX_COMPILER dpcpp) + register_definitions(CL_TARGET_OPENCL_VERSION=220) + else () + message(FATAL_ERROR "SYCL_COMPILER=${SYCL_COMPILER} is unsupported") + endif () + +endmacro() + + +macro(setup_target NAME) + if ( + (${SYCL_COMPILER} STREQUAL "COMPUTECPP") OR + (${SYCL_COMPILER} STREQUAL "HIPSYCL")) + # so ComputeCpp and hipSYCL has this weird (and bad) CMake usage where they append their + # own custom integration header flags AFTER the target has been specified + # hence this macro here + add_sycl_to_target( + TARGET ${NAME} + SOURCES ${IMPL_SOURCES}) + endif () +endmacro() diff --git a/ci-prepare-bionic.sh b/ci-prepare-bionic.sh new file mode 100755 index 0000000..31b76ee --- /dev/null +++ b/ci-prepare-bionic.sh @@ -0,0 +1,335 @@ +#!/usr/bin/env bash + +set -eu + +WORK_DIR="${1:-.}" +MODE="${2:-SETUP}" +PARALLEL="${3:-false}" + +FORCE_DOWNLOAD=false + +if [ "$_" = "$0" ]; then + echo "This script must be sourced for the exports to work!" + exit 1 +fi + +case "$MODE" in +SETUP) + SETUP=true + echo "Preparing env with setup..." + ;; +VARS) + SETUP=false + echo "Preparing env without setup..." + ;; +*) + echo "Bad option" + echo "$0 " + exit 1 + ;; +esac + +mkdir -p "$WORK_DIR" +PREV_DIR="$PWD" +cd "$WORK_DIR" || exit 1 + +export_var() { + export "$1"="$2" + # see + # https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions#setting-an-environment-variable + if [ "${GITHUB_ACTIONS:-false}" = true ]; then + echo "$1=$2" >>"$GITHUB_ENV" + fi +} + +check_size() { + if [ "$SETUP" = true ]; then + echo "Used space for $PWD" + du -sh . + df -h . + fi +} + +get_and_install_deb() { + + local name="$1" + local install_dir="$2" + local pkg_url="$3" + shift + local wget_args=("$@") + + local pkg_name="$name.deb" + + if [ "$SETUP" = true ]; then + if [ ! -f "$pkg_name" ] || [ "$FORCE_DOWNLOAD" = true ]; then + echo "$pkg_name not found, downloading" + rm -f "$pkg_name" + # shellcheck disable=SC2086 + wget -q --show-progress --progress=bar:force:noscroll "${wget_args[@]}" "$pkg_url" -O "$pkg_name" + fi + # rm -rf "$install_dir" + echo "Preparing to install $pkg_name locally to $install_dir ..." + dpkg-deb -x "$pkg_name" "$install_dir" + echo "$pkg_name installed, deleting $pkg_name ..." + rm -f "$pkg_name" # delete for space + fi + +} + +get_and_untar() { + local name="$1" + local pkg_url="$2" + if [ "$SETUP" = true ]; then + if [ ! -f "$name" ] || [ "$FORCE_DOWNLOAD" = true ]; then + echo "$name not found, downloading..." + wget -q --show-progress --progress=bar:force:noscroll "$pkg_url" -O "$name" + fi + echo "Preparing to extract $name ..." + tar -xf "$name" + echo "$name extracted, deleting archive ..." + rm -f "$name" # delete for space + fi +} + +verify_bin_exists() { + if [ ! -f "$1" ]; then + echo "[FAIL] $1 does not exist or is not a file!" + exit 1 + else echo "[OK! ] $1"; fi +} + +verify_dir_exists() { + if [ ! -d "$1" ]; then + echo "[FAIL] $1 does not exist or is not a directory!" + exit 1 + else echo "[OK! ] $1"; fi +} + +setup_aocc() { + echo "Preparing AOCC" + + local aocc_ver="2.3.0" + local tarball="aocc-$aocc_ver.tar.xz" + # XXX it's actually XZ compressed, so it should be tar.xz + local AOCC_URL="http://developer.amd.com/wordpress/media/files/aocc-compiler-2.3.0.tar" + # local AOCC_URL="http://localhost:8000/aocc-compiler-2.3.0.tar" + + get_and_untar "$tarball" "$AOCC_URL" + export_var AOCC_CXX "$PWD/aocc-compiler-$aocc_ver/bin/clang++" + verify_bin_exists "$AOCC_CXX" + "$AOCC_CXX" --version + check_size +} + +setup_nvhpc() { + echo "Preparing Nvidia HPC SDK" + + local tarball="nvhpc.tar.gz" +# local url="http://localhost:8000/nvhpc_2021_212_Linux_x86_64_cuda_11.2.tar.gz" + local url="https://developer.download.nvidia.com/hpc-sdk/21.2/nvhpc_2021_212_Linux_x86_64_cuda_11.2.tar.gz" + + get_and_untar "$tarball" "$url" + + local sdk_dir="$PWD/nvhpc_2021_212_Linux_x86_64_cuda_11.2/install_components/Linux_x86_64/21.2" + local bin_dir="$sdk_dir/compilers/bin" + "$bin_dir/makelocalrc" "$bin_dir" -x + + export_var NVHPC_NVCXX "$bin_dir/nvc++" + export_var NVHPC_NVCC "$sdk_dir/cuda/11.2/bin/nvcc" + export_var NVHPC_CUDA_DIR "$sdk_dir/cuda/11.2" + echo "Installed CUDA versions:" + ls "$sdk_dir/cuda" + verify_bin_exists "$NVHPC_NVCXX" + verify_bin_exists "$NVHPC_NVCC" + "$NVHPC_NVCXX" --version + "$NVHPC_NVCC" --version + check_size +} + +setup_aomp() { + echo "Preparing AOMP" + local AOMP_URL="https://github.com/ROCm-Developer-Tools/aomp/releases/download/rel_11.12-0/aomp_Ubuntu1804_11.12-0_amd64.deb" + # local AOMP_URL="http://0.0.0.0:8000/aomp_Ubuntu1804_11.12-0_amd64.deb" + get_and_install_deb "aomp" "aomp" "$AOMP_URL" + + export_var AOMP_CXX "$PWD/aomp/usr/lib/aomp/bin/clang++" + verify_bin_exists "$AOMP_CXX" + "$AOMP_CXX" --version + check_size +} + +setup_oclcpu() { + echo "Preparing Intel CPU OpenCL runtime" + local tarball="oclcpuexp.tar.gz" + local url="https://github.com/intel/llvm/releases/download/2020-12/oclcpuexp-2020.11.11.0.04_rel.tar.gz" + # local url="http://localhost:8000/oclcpuexp-2020.11.11.0.04_rel.tar.gz" + get_and_untar "$tarball" "$url" + export_var OCL_LIB "$PWD/x64/libOpenCL.so" + verify_bin_exists "$OCL_LIB" + check_size +} + +setup_kokkos() { + echo "Preparing Kokkos" + local kokkos_ver="3.3.01" + local tarball="kokkos-$kokkos_ver.tar.gz" + + local url="https://github.com/kokkos/kokkos/archive/$kokkos_ver.tar.gz" + # local url="http://localhost:8000/$kokkos_ver.tar.gz" + + get_and_untar "$tarball" "$url" + export_var KOKKOS_SRC "$PWD/kokkos-$kokkos_ver" + verify_dir_exists "$KOKKOS_SRC" + check_size +} + +setup_raja() { + echo "Preparing RAJA" + local raja_ver="0.13.0" + local tarball="raja-$raja_ver.tar.gz" + + local url="https://github.com/LLNL/RAJA/releases/download/v0.13.0/RAJA-v$raja_ver.tar.gz" + # local url="http://localhost:8000/RAJA-v$raja_ver.tar.gz" + + get_and_untar "$tarball" "$url" + export_var RAJA_SRC "$PWD/RAJA-v$raja_ver" + verify_dir_exists "$RAJA_SRC" + check_size +} + +setup_clang_gcc() { + + echo "deb http://archive.ubuntu.com/ubuntu focal main universe" | sudo tee -a /etc/apt/sources.list + + sudo apt-get update -qq + sudo apt-get install -y -qq gcc-10-offload-nvptx gcc-10-offload-amdgcn libtbb2 libtbb-dev g++-10 + + export_var GCC_CXX "$(which g++-10)" + verify_bin_exists "$GCC_CXX" + "$GCC_CXX" --version + + export_var GCC_STD_PAR_LIB "tbb" + export_var GCC_OMP_OFFLOAD_AMD true + export_var GCC_OMP_OFFLOAD_NVIDIA true + + clang++ --version + export_var CLANG_CXX "$(which clang++)" + verify_bin_exists "$CLANG_CXX" + "$CLANG_CXX" --version + + export_var CLANG_STD_PAR_LIB "tbb" + export_var CLANG_OMP_OFFLOAD_AMD false + export_var CLANG_OMP_OFFLOAD_NVIDIA false + check_size + +} + +setup_rocm() { + wget -q -O - "https://repo.radeon.com/rocm/rocm.gpg.key" | sudo apt-key add - + echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/debian/ xenial main' | sudo tee /etc/apt/sources.list.d/rocm.list + sudo apt-get update -qq + sudo apt-get install -y -qq rocm-dev + # AMD needs this rocm_path thing exported... + export_var ROCM_PATH "/opt/rocm-4.0.0" + export_var HIP_CXX "$ROCM_PATH/bin/hipcc" + verify_bin_exists "$HIP_CXX" + "$HIP_CXX" --version + check_size +} + +setup_dpcpp() { + + local nightly="20210106" + local tarball="dpcpp-$nightly.tar.gz" + + local url="https://github.com/intel/llvm/releases/download/sycl-nightly/$nightly/dpcpp-compiler.tar.gz" + # local url="http://localhost:8000/dpcpp-compiler.tar.gz" + + get_and_untar "$tarball" "$url" + + export_var DPCPP_DIR "$PWD/dpcpp_compiler/" + verify_dir_exists "$DPCPP_DIR" + "$DPCPP_DIR/bin/clang++" --version + check_size +} + +setup_hipsycl() { + + sudo apt-get install -y -qq libboost-fiber-dev libboost-context-dev + local hipsycl_ver="0.9.0" + local tarball="v$hipsycl_ver.tar.gz" + local install_dir="$PWD/hipsycl_dist_$hipsycl_ver" + + local url="https://github.com/illuhad/hipSYCL/archive/v$hipsycl_ver.tar.gz" + # local url="http://localhost:8000/hipSYCL-$hipsycl_ver.tar.gz" + + get_and_untar "$tarball" "$url" + + if [ "$SETUP" = true ]; then + local src="$PWD/hipSYCL-$hipsycl_ver" + rm -rf "$src/build" + rm -rf "$install_dir" + cmake "-B$src/build" "-H$src" \ + -DCMAKE_C_COMPILER="$(which gcc-10)" \ + -DCMAKE_CXX_COMPILER="$(which g++-10)" \ + -DCMAKE_INSTALL_PREFIX="$install_dir" \ + -DWITH_ROCM_BACKEND=OFF \ + -DWITH_CUDA_BACKEND=OFF \ + -DWITH_CPU_BACKEND=ON + cmake --build "$src/build" --target install -j "$(nproc)" + fi + + export_var HIPSYCL_DIR "$install_dir" + verify_dir_exists "$HIPSYCL_DIR" + # note: this will forward --version to the default compiler so it won't say anything about hipsycl + "$HIPSYCL_DIR/bin/syclcc-clang" --version + check_size +} + +setup_computecpp() { + echo "TODO ComputeCpp requires registration+login to download" +} + +if [ "${GITHUB_ACTIONS:-false}" = true ]; then + echo "Running in GitHub Actions, defaulting to special export" + TERM=xterm + export TERM=xterm + if [ "$SETUP" = true ]; then + echo "Deleting extra packages for space in 5 seconds..." + sleep 5 + echo "Starting apt-get remove:" + sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel + sudo apt-get autoremove -y + check_size + fi +else + echo "Running locally, defaulting to standard export" +fi + + +if [ "$PARALLEL" = true ]; then + (setup_clang_gcc && setup_rocm && setup_hipsycl) & # these need apt so run sequentially + setup_oclcpu & + setup_aocc & + setup_nvhpc & + setup_aomp & + setup_dpcpp & + setup_kokkos & + setup_raja & + wait +else + setup_aocc + setup_oclcpu + setup_nvhpc + setup_aomp + setup_dpcpp + setup_kokkos + setup_raja + # these need apt + setup_clang_gcc + setup_rocm + setup_hipsycl +fi + +echo "Done!" +cd "$PREV_DIR" || exit 1 diff --git a/ci-test-compile.sh b/ci-test-compile.sh new file mode 100755 index 0000000..9a22140 --- /dev/null +++ b/ci-test-compile.sh @@ -0,0 +1,252 @@ +#!/usr/bin/env bash + +set -eu + +# prevent ccache from caching anything for system compilers +export CCACHE_DISABLE=1 + +BUILD_DIR=${1:-build} +COMPILER=${2:-all} +MODEL=${3:-all} + +LOG_DIR="$BUILD_DIR" + +mkdir -p "$LOG_DIR" + +if [ "${GITHUB_ACTIONS:-false}" = true ]; then + echo "Running in GitHub Actions, setting TERM..." + TERM=xterm + export TERM=xterm +fi + +function_exists() { + declare -f -F "$1" >/dev/null + return $? +} + +run_build() { + local key="$1" + local grep_kw="$2" + local model="$3" + local flags="$4" + + if [ "$MODEL" != "all" ] && [ "$MODEL" != "$model" ]; then + echo "Skipping -DMODEL=$model $flags" + return 0 + fi + + local log="$LOG_DIR/${model}_${key}.log" + rm -f "$log" + touch "$log" + + local build="$BUILD_DIR/${model}_${key}" + + rm -rf "$build" + set +e + + # shellcheck disable=SC2086 + cmake -B"$build" -H. \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_VERBOSE_MAKEFILE=ON \ + -DMODEL="$model" $flags &>>"$log" + local cmake_code=$? + + cmake --build "$build" --target babelstream -j "$(nproc)" &>>"$log" + local cmake_code=$? + set -e + + local bin="./$build/babelstream" + if [[ -f "$bin" ]]; then + echo "$(tput setaf 2)[PASS!]($model->$build)$(tput sgr0): -DMODEL=$model $flags" + # shellcheck disable=SC2002 + cat "$log" | sed '/^--/d' | grep -i "$grep_kw" | sed 's/^/ /' + else + echo "$(tput setaf 1)[FAIL!]($model->$build)$(tput sgr0): -DMODEL=$model $flags" + echo " CMake exited with code $cmake_code, see full build log at $log, reproduced below:" + cat "$log" + exit 1 + fi + echo " $(tput setaf 4)$(file "$bin")$(tput sgr0)" +} + +## +#KOKKOS_SRC="/home/tom/Downloads/kokkos-3.3.00" +#RAJA_SRC="/home/tom/Downloads/RAJA-v0.13.0" +# +#GCC_CXX="/usr/bin/g++" +#CLANG_CXX="/usr/bin/clang++" +# +#NVSDK="/home/tom/Downloads/nvhpc_2021_212_Linux_x86_64_cuda_11.2/install_components/Linux_x86_64/21.2/" +#NVHPC_NVCXX="$NVSDK/compilers/bin/nvc++" +#NVHPC_NVCC="$NVSDK/cuda/11.2/bin/nvcc" +#NVHPC_CUDA_DIR="$NVSDK/cuda/11.2" +#"$NVSDK/compilers/bin/makelocalrc" "$NVSDK/compilers/bin/" -x +# +##NVHPC_NVCXX="/opt/nvidia/hpc_sdk/Linux_x86_64/21.1/compilers/bin/nvc++" +##NVHPC_NVCC="/opt/nvidia/hpc_sdk/Linux_x86_64/21.1/cuda/11.2/bin/nvcc" +##NVHPC_CUDA_DIR="/opt/nvidia/hpc_sdk/Linux_x86_64/21.1/cuda/11.2" +# +#AOCC_CXX="/opt/AMD/aocc-compiler-2.3.0/bin/clang++" +#AOMP_CXX="/usr/lib/aomp/bin/clang++" +#OCL_LIB="/home/tom/Downloads/oclcpuexp-2020.11.11.0.04_rel/x64/libOpenCL.so" +# +## AMD needs this rocm_path thing exported... +#export ROCM_PATH="/opt/rocm-4.0.0" +#HIP_CXX="/opt/rocm-4.0.0/bin/hipcc" +#COMPUTECPP_DIR="/home/tom/Desktop/computecpp_archive/ComputeCpp-CE-2.3.0-x86_64-linux-gnu" +#DPCPP_DIR="/home/tom/Downloads/dpcpp_compiler" +#HIPSYCL_DIR="/opt/hipsycl/cff515c/" +# +#ICPX_CXX="/opt/intel/oneapi/compiler/2021.1.2/linux/bin/icpx" +# +#GCC_STD_PAR_LIB="" +#CLANG_STD_PAR_LIB="" +#GCC_OMP_OFFLOAD_AMD=false +#GCC_OMP_OFFLOAD_NVIDIA=true +#CLANG_OMP_OFFLOAD_AMD=false +#CLANG_OMP_OFFLOAD_NVIDIA=false + +AMD_ARCH="gfx_903" +NV_ARCH="sm_70" +NV_ARCH_CCXY="cuda11.2,cc80" + +build_gcc() { + local name="gcc_build" + local cxx="-DCMAKE_CXX_COMPILER=${GCC_CXX:?}" + + run_build $name "${GCC_CXX:?}" OMP "$cxx" + if [ "$MODEL" = "all" ] || [ "$MODEL" = "OMP" ]; then + # sanity check that it at least runs + echo "Sanity checking GCC OMP build..." + "./$BUILD_DIR/OMP_$name/babelstream" -s 1048576 -n 10 + fi + + # some distributions like Ubuntu bionic implements std par with TBB, so conditionally link it here + run_build $name "${GCC_CXX:?}" STD "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}" + run_build $name "${GCC_CXX:?}" STD20 "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}" + + if [ "${GCC_OMP_OFFLOAD_AMD:-false}" != "false" ]; then + run_build "amd_$name" "${GCC_CXX:?}" ACC "$cxx -DCXX_EXTRA_FLAGS=-foffload=amdgcn-amdhsa" + run_build "amd_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=AMD:$AMD_ARCH" + fi + + if [ "${GCC_OMP_OFFLOAD_NVIDIA:-false}" != "false" ]; then + run_build "nvidia_$name" "${GCC_CXX:?}" ACC "$cxx -DCXX_EXTRA_FLAGS=-foffload=nvptx-none" + run_build "nvidia_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH" + fi + + run_build $name "${GCC_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH" + run_build $name "${GCC_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED" + run_build $name "${GCC_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT" + # run_build $name "${CC_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_CUDA=ON" + run_build "cuda_$name" "${GCC_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON" + run_build $name "${GCC_CXX:?}" OCL "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" + run_build $name "${GCC_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}" + run_build "cuda_$name" "${GCC_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} \ + -DENABLE_CUDA=ON \ + -DTARGET=NVIDIA \ + -DCUDA_TOOLKIT_ROOT_DIR=${NVHPC_CUDA_DIR:?} \ + -DCUDA_ARCH=$NV_ARCH" + +} + +build_clang() { + local name="clang_build" + local cxx="-DCMAKE_CXX_COMPILER=${CLANG_CXX:?}" + run_build $name "${CLANG_CXX:?}" OMP "$cxx" + + if [ "${CLANG_OMP_OFFLOAD_AMD:-false}" != "false" ]; then + run_build "amd_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=AMD:$AMD_ARCH" + fi + + if [ "${CLANG_OMP_OFFLOAD_NVIDIA:-false}" != "false" ]; then + run_build "nvidia_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH" + fi + + run_build $name "${CLANG_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH" + run_build $name "${CLANG_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED" + run_build $name "${CLANG_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT" + run_build $name "${CLANG_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON" + run_build $name "${CLANG_CXX:?}" OCL "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" + run_build $name "${CLANG_CXX:?}" STD "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" + # run_build $name "${LANG_CXX:?}" STD20 "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" # not yet supported + run_build $name "${CLANG_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}" + # no clang /w RAJA+cuda because it needs nvcc which needs gcc +} + +build_nvhpc() { + local name="nvhpc_build" + local cxx="-DCMAKE_CXX_COMPILER=${NVHPC_NVCXX:?}" + run_build $name "${NVHPC_NVCXX:?}" STD "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY" + run_build $name "${NVHPC_NVCXX:?}" ACC "$cxx -DTARGET_DEVICE=gpu -DTARGET_PROCESSOR=px -DCUDA_ARCH=$NV_ARCH_CCXY" + run_build $name "${NVHPC_NVCXX:?}" ACC "$cxx -DTARGET_DEVICE=multicore -DTARGET_PROCESSOR=zen" +} + +build_aocc() { + run_build aocc_build "${AOCC_CXX:?}" OMP "-DCMAKE_CXX_COMPILER=${AOCC_CXX:?}" +} + +build_aomp() { + run_build aomp_amd_build "${AOMP_CXX:?}" OMP "-DCMAKE_CXX_COMPILER=${AOMP_CXX:?} -DOFFLOAD=AMD:gfx906" + #run_build aomp_nvidia_build "-DCMAKE_CXX_COMPILER=${AOMP_CXX:?} -DOFFLOAD=NVIDIA:$NV_ARCH" +} + +build_hip() { + run_build hip_build "${HIP_CXX:?}" HIP "-DCMAKE_CXX_COMPILER=${HIP_CXX:?}" +} + +build_icpx() { + source /opt/intel/oneapi/setvars.sh -force + run_build intel_build "${ICPX_CXX:?}" OMP "-DCMAKE_CXX_COMPILER=${ICPX_CXX:?} -DOFFLOAD=INTEL" +} + +build_computecpp() { + run_build computecpp_build "compute++" SYCL "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \ + -DSYCL_COMPILER=COMPUTECPP \ + -DSYCL_COMPILER_DIR=${COMPUTECPP_DIR:?} \ + -DOpenCL_LIBRARY=${OCL_LIB:?}" +} + +build_dpcpp() { + run_build intel_build "${DPCPP_DIR:?}" SYCL "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \ + -DSYCL_COMPILER=DPCPP \ + -DSYCL_COMPILER_DIR=${DPCPP_DIR:?}" + + # for oneAPI BaseKit: + # source /opt/intel/oneapi/setvars.sh -force + # run_build intel_build "dpcpp" SYCL "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} -DSYCL_COMPILER=ONEAPI-DPCPP" +} + +build_hipsycl() { + run_build hipsycl_build "syclcc" SYCL " + -DSYCL_COMPILER=HIPSYCL \ + -DSYCL_COMPILER_DIR=${HIPSYCL_DIR:?}" +} + +# TODO tested locally but can't install compilers for these two remotely without registration/license: +# build_icpx +# build_computecpp + +case "$COMPILER" in +gcc) build_gcc ;; +clang) build_clang ;; +nvhpc) build_nvhpc ;; +aocc) build_aocc ;; +aomp) build_aomp ;; +hip) build_hip ;; +dpcpp) build_dpcpp ;; +hipsycl) build_hipsycl ;; +all) + build_gcc + build_clang + build_nvhpc + build_aocc + build_aomp + build_hip + build_dpcpp + build_hipsycl + ;; +*) + echo "Unknown $COMPILER, use ALL to compile with all supported compilers" + ;; +esac diff --git a/cmake/Modules/ComputeCppCompilerChecks.cmake b/cmake/Modules/ComputeCppCompilerChecks.cmake new file mode 100644 index 0000000..76e9749 --- /dev/null +++ b/cmake/Modules/ComputeCppCompilerChecks.cmake @@ -0,0 +1,65 @@ +cmake_minimum_required(VERSION 3.4.3) + +if(CMAKE_COMPILER_IS_GNUCXX) + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8) + message(FATAL_ERROR "host compiler - gcc version must be > 4.8") + endif() +elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.6) + message(FATAL_ERROR "host compiler - clang version must be > 3.6") + endif() +endif() + +if(MSVC) + set(ComputeCpp_STL_CHECK_SRC __STL_check) + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp + "#include \n" + "int main() { return 0; }\n") + set(_stl_test_command ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE} + -sycl + ${COMPUTECPP_DEVICE_COMPILER_FLAGS} + -isystem ${ComputeCpp_INCLUDE_DIRS} + -isystem ${OpenCL_INCLUDE_DIRS} + -o ${ComputeCpp_STL_CHECK_SRC}.sycl + -c ${ComputeCpp_STL_CHECK_SRC}.cpp) + execute_process( + COMMAND ${_stl_test_command} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT + ERROR_QUIET + OUTPUT_QUIET) + if(NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0) + # Try disabling compiler version checks + execute_process( + COMMAND ${_stl_test_command} + -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT + ERROR_QUIET + OUTPUT_QUIET) + if(NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0) + # Try again with __CUDACC__ and _HAS_CONDITIONAL_EXPLICIT=0. This relaxes the restritions in the MSVC headers + execute_process( + COMMAND ${_stl_test_command} + -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH + -D_HAS_CONDITIONAL_EXPLICIT=0 + -D__CUDACC__ + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + RESULT_VARIABLE ComputeCpp_STL_CHECK_RESULT + ERROR_QUIET + OUTPUT_QUIET) + if(NOT ${ComputeCpp_STL_CHECK_RESULT} EQUAL 0) + message(FATAL_ERROR "compute++ cannot consume hosted STL headers. This means that compute++ can't \ + compile a simple program in this platform and will fail when used in this system.") + else() + list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH + -D_HAS_CONDITIONAL_EXPLICIT=0 + -D__CUDACC__) + endif() + else() + list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -D_ALLOW_COMPILER_AND_STL_VERSION_MISMATCH) + endif() + endif() + file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp + ${CMAKE_CURRENT_BINARY_DIR}/${ComputeCpp_STL_CHECK_SRC}.cpp.sycl) +endif(MSVC) diff --git a/cmake/Modules/ComputeCppIRMap.cmake b/cmake/Modules/ComputeCppIRMap.cmake new file mode 100644 index 0000000..942d91d --- /dev/null +++ b/cmake/Modules/ComputeCppIRMap.cmake @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 3.4.3) + +# These should match the types of IR output by compute++ +set(IR_MAP_spir bc) +set(IR_MAP_spir64 bc) +set(IR_MAP_spir32 bc) +set(IR_MAP_spirv spv) +set(IR_MAP_spirv64 spv) +set(IR_MAP_spirv32 spv) +set(IR_MAP_aorta-x86_64 o) +set(IR_MAP_aorta-aarch64 o) +set(IR_MAP_aorta-rcar-cve o) +set(IR_MAP_custom-spir64 bc) +set(IR_MAP_custom-spir32 bc) +set(IR_MAP_custom-spirv64 spv) +set(IR_MAP_custom-spirv32 spv) +set(IR_MAP_ptx64 s) +set(IR_MAP_amdgcn s) diff --git a/cmake/Modules/FindComputeCpp.cmake b/cmake/Modules/FindComputeCpp.cmake new file mode 100644 index 0000000..3cca515 --- /dev/null +++ b/cmake/Modules/FindComputeCpp.cmake @@ -0,0 +1,454 @@ +#.rst: +# FindComputeCpp +#--------------- +# +# Copyright 2016-2018 Codeplay Software Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use these files except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +######################### +# FindComputeCpp.cmake +######################### +# +# Tools for finding and building with ComputeCpp. +# +# User must define ComputeCpp_DIR pointing to the ComputeCpp +# installation. +# +# Latest version of this file can be found at: +# https://github.com/codeplaysoftware/computecpp-sdk + +cmake_minimum_required(VERSION 3.4.3) +include(FindPackageHandleStandardArgs) +include(ComputeCppIRMap) + +set(COMPUTECPP_USER_FLAGS "" CACHE STRING "User flags for compute++") +separate_arguments(COMPUTECPP_USER_FLAGS) +mark_as_advanced(COMPUTECPP_USER_FLAGS) + +set(COMPUTECPP_BITCODE "spir64" CACHE STRING + "Bitcode type to use as SYCL target in compute++") +mark_as_advanced(COMPUTECPP_BITCODE) + +find_package(OpenCL REQUIRED) + +# Find ComputeCpp package + +if(DEFINED ComputeCpp_DIR) + set(computecpp_find_hint ${ComputeCpp_DIR}) +elseif(DEFINED ENV{COMPUTECPP_DIR}) + set(computecpp_find_hint $ENV{COMPUTECPP_DIR}) +endif() + +# Used for running executables on the host +set(computecpp_host_find_hint ${computecpp_find_hint}) + +if(CMAKE_CROSSCOMPILING) + # ComputeCpp_HOST_DIR is used to find executables that are run on the host + if(DEFINED ComputeCpp_HOST_DIR) + set(computecpp_host_find_hint ${ComputeCpp_HOST_DIR}) + elseif(DEFINED ENV{COMPUTECPP_HOST_DIR}) + set(computecpp_host_find_hint $ENV{COMPUTECPP_HOST_DIR}) + endif() +endif() + +find_program(ComputeCpp_DEVICE_COMPILER_EXECUTABLE compute++ + HINTS ${computecpp_host_find_hint} + PATH_SUFFIXES bin + NO_SYSTEM_ENVIRONMENT_PATH) + +find_program(ComputeCpp_INFO_EXECUTABLE computecpp_info + HINTS ${computecpp_host_find_hint} + PATH_SUFFIXES bin + NO_SYSTEM_ENVIRONMENT_PATH) + +find_library(COMPUTECPP_RUNTIME_LIBRARY + NAMES ComputeCpp ComputeCpp_vs2015 + HINTS ${computecpp_find_hint} + PATH_SUFFIXES lib + DOC "ComputeCpp Runtime Library") + +find_library(COMPUTECPP_RUNTIME_LIBRARY_DEBUG + NAMES ComputeCpp_d ComputeCpp ComputeCpp_vs2015_d + HINTS ${computecpp_find_hint} + PATH_SUFFIXES lib + DOC "ComputeCpp Debug Runtime Library") + +find_path(ComputeCpp_INCLUDE_DIRS + NAMES "CL/sycl.hpp" + HINTS ${computecpp_find_hint}/include + DOC "The ComputeCpp include directory") +get_filename_component(ComputeCpp_INCLUDE_DIRS ${ComputeCpp_INCLUDE_DIRS} ABSOLUTE) + +get_filename_component(computecpp_canonical_root_dir "${ComputeCpp_INCLUDE_DIRS}/.." ABSOLUTE) +set(ComputeCpp_ROOT_DIR "${computecpp_canonical_root_dir}" CACHE PATH + "The root of the ComputeCpp install") + +if(NOT ComputeCpp_INFO_EXECUTABLE) + message(WARNING "Can't find computecpp_info - check ComputeCpp_DIR") +else() + execute_process(COMMAND ${ComputeCpp_INFO_EXECUTABLE} "--dump-version" + OUTPUT_VARIABLE ComputeCpp_VERSION + RESULT_VARIABLE ComputeCpp_INFO_EXECUTABLE_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT ComputeCpp_INFO_EXECUTABLE_RESULT EQUAL "0") + message(WARNING "Package version - Error obtaining version!") + endif() + + execute_process(COMMAND ${ComputeCpp_INFO_EXECUTABLE} "--dump-is-supported" + OUTPUT_VARIABLE COMPUTECPP_PLATFORM_IS_SUPPORTED + RESULT_VARIABLE ComputeCpp_INFO_EXECUTABLE_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT ComputeCpp_INFO_EXECUTABLE_RESULT EQUAL "0") + message(WARNING "platform - Error checking platform support!") + else() + mark_as_advanced(COMPUTECPP_PLATFORM_IS_SUPPORTED) + if (COMPUTECPP_PLATFORM_IS_SUPPORTED) + message(STATUS "platform - your system can support ComputeCpp") + else() + message(STATUS "platform - your system is not officially supported") + endif() + endif() +endif() + +find_package_handle_standard_args(ComputeCpp + REQUIRED_VARS ComputeCpp_ROOT_DIR + ComputeCpp_DEVICE_COMPILER_EXECUTABLE + ComputeCpp_INFO_EXECUTABLE + COMPUTECPP_RUNTIME_LIBRARY + COMPUTECPP_RUNTIME_LIBRARY_DEBUG + ComputeCpp_INCLUDE_DIRS + VERSION_VAR ComputeCpp_VERSION) +mark_as_advanced(ComputeCpp_ROOT_DIR + ComputeCpp_DEVICE_COMPILER_EXECUTABLE + ComputeCpp_INFO_EXECUTABLE + COMPUTECPP_RUNTIME_LIBRARY + COMPUTECPP_RUNTIME_LIBRARY_DEBUG + ComputeCpp_INCLUDE_DIRS + ComputeCpp_VERSION) + +if(NOT ComputeCpp_FOUND) + return() +endif() + +list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -O2 -mllvm -inline-threshold=1000 -intelspirmetadata) +mark_as_advanced(COMPUTECPP_DEVICE_COMPILER_FLAGS) + +if(CMAKE_CROSSCOMPILING) + if(NOT COMPUTECPP_DONT_USE_TOOLCHAIN) + list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS --gcc-toolchain=${COMPUTECPP_TOOLCHAIN_DIR}) + endif() + list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS --sysroot=${COMPUTECPP_SYSROOT_DIR}) + list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -target ${COMPUTECPP_TARGET_TRIPLE}) +endif() + +list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS -sycl-target ${COMPUTECPP_BITCODE}) +message(STATUS "compute++ flags - ${COMPUTECPP_DEVICE_COMPILER_FLAGS}") + +include(ComputeCppCompilerChecks) + +if(NOT TARGET OpenCL::OpenCL) + add_library(OpenCL::OpenCL UNKNOWN IMPORTED) + set_target_properties(OpenCL::OpenCL PROPERTIES + IMPORTED_LOCATION "${OpenCL_LIBRARIES}" + INTERFACE_INCLUDE_DIRECTORIES "${OpenCL_INCLUDE_DIRS}" + ) +endif() + +if(NOT TARGET ComputeCpp::ComputeCpp) + add_library(ComputeCpp::ComputeCpp UNKNOWN IMPORTED) + set_target_properties(ComputeCpp::ComputeCpp PROPERTIES + IMPORTED_LOCATION_DEBUG "${COMPUTECPP_RUNTIME_LIBRARY_DEBUG}" + IMPORTED_LOCATION_RELWITHDEBINFO "${COMPUTECPP_RUNTIME_LIBRARY}" + IMPORTED_LOCATION "${COMPUTECPP_RUNTIME_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${ComputeCpp_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES "OpenCL::OpenCL" + ) +endif() + +# This property allows targets to specify that their sources should be +# compiled with the integration header included after the user's +# sources, not before (e.g. when an enum is used in a kernel name, this +# is not technically valid SYCL code but can work with ComputeCpp) +define_property( + TARGET PROPERTY COMPUTECPP_INCLUDE_AFTER + BRIEF_DOCS "Include integration header after user source" + FULL_DOCS "Changes compiler arguments such that the source file is + actually the integration header, and the .cpp file is included on + the command line so that it is seen by the compiler first. Enables + non-standards-conformant SYCL code to compile with ComputeCpp." +) +define_property( + TARGET PROPERTY INTERFACE_COMPUTECPP_FLAGS + BRIEF_DOCS "Interface compile flags to provide compute++" + FULL_DOCS "Set additional compile flags to pass to compute++ when compiling + any target which links to this one." +) +define_property( + SOURCE PROPERTY COMPUTECPP_SOURCE_FLAGS + BRIEF_DOCS "Source file compile flags for compute++" + FULL_DOCS "Set additional compile flags for compiling the SYCL integration + header for the given source file." +) + +#################### +# __build_ir +#################### +# +# Adds a custom target for running compute++ and adding a dependency for the +# resulting integration header and kernel binary. +# +# TARGET : Name of the target. +# SOURCE : Source file to be compiled. +# COUNTER : Counter included in name of custom target. Different counter +# values prevent duplicated names of custom target when source files with +# the same name, but located in different directories, are used for the +# same target. +# +function(__build_ir) + set(options) + set(one_value_args + TARGET + SOURCE + COUNTER + ) + set(multi_value_args) + cmake_parse_arguments(SDK_BUILD_IR + "${options}" + "${one_value_args}" + "${multi_value_args}" + ${ARGN} + ) + get_filename_component(sourceFileName ${SDK_BUILD_IR_SOURCE} NAME) + + # Set the path to the integration header. + # The .sycl filename must depend on the target so that different targets + # using the same source file will be generated with a different rule. + set(baseSyclName ${CMAKE_CURRENT_BINARY_DIR}/${SDK_BUILD_IR_TARGET}_${sourceFileName}) + set(outputSyclFile ${baseSyclName}.sycl) + set(outputDeviceFile ${baseSyclName}.${IR_MAP_${COMPUTECPP_BITCODE}}) + set(depFileName ${baseSyclName}.sycl.d) + + set(include_directories "$") + set(compile_definitions "$") + set(generated_include_directories + $<$:-I\"$\">) + set(generated_compile_definitions + $<$:-D$>) + + # Obtain language standard of the file + set(device_compiler_cxx_standard) + get_target_property(targetCxxStandard ${SDK_BUILD_IR_TARGET} CXX_STANDARD) + if (targetCxxStandard MATCHES 17) + set(device_compiler_cxx_standard "-std=c++1z") + elseif (targetCxxStandard MATCHES 14) + set(device_compiler_cxx_standard "-std=c++14") + elseif (targetCxxStandard MATCHES 11) + set(device_compiler_cxx_standard "-std=c++11") + elseif (targetCxxStandard MATCHES 98) + message(FATAL_ERROR "SYCL applications cannot be compiled using C++98") + else () + set(device_compiler_cxx_standard "") + endif() + + get_property(source_compile_flags + SOURCE ${SDK_BUILD_IR_SOURCE} + PROPERTY COMPUTECPP_SOURCE_FLAGS + ) + separate_arguments(source_compile_flags) + if(source_compile_flags) + list(APPEND computecpp_source_flags ${source_compile_flags}) + endif() + + list(APPEND COMPUTECPP_DEVICE_COMPILER_FLAGS + ${device_compiler_cxx_standard} + ${COMPUTECPP_USER_FLAGS} + ${computecpp_source_flags} + ) + + set(ir_dependencies ${SDK_BUILD_IR_SOURCE}) + get_target_property(target_libraries ${SDK_BUILD_IR_TARGET} LINK_LIBRARIES) + if(target_libraries) + foreach(library ${target_libraries}) + if(TARGET ${library}) + list(APPEND ir_dependencies ${library}) + endif() + endforeach() + endif() + + # Depfile support was only added in CMake 3.7 + # CMake throws an error if it is unsupported by the generator (i. e. not ninja) + if((NOT CMAKE_VERSION VERSION_LESS 3.7.0) AND + CMAKE_GENERATOR MATCHES "Ninja") + file(RELATIVE_PATH relOutputFile ${CMAKE_BINARY_DIR} ${outputDeviceFile}) + set(generate_depfile -MMD -MF ${depFileName} -MT ${relOutputFile}) + set(enable_depfile DEPFILE ${depFileName}) + endif() + + # Add custom command for running compute++ + add_custom_command( + OUTPUT ${outputDeviceFile} ${outputSyclFile} + COMMAND ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE} + ${COMPUTECPP_DEVICE_COMPILER_FLAGS} + ${generated_include_directories} + ${generated_compile_definitions} + -sycl-ih ${outputSyclFile} + -o ${outputDeviceFile} + -c ${SDK_BUILD_IR_SOURCE} + ${generate_depfile} + DEPENDS ${ir_dependencies} + IMPLICIT_DEPENDS CXX ${SDK_BUILD_IR_SOURCE} + ${enable_depfile} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Building ComputeCpp integration header file ${outputSyclFile}") + + # Name: (user-defined name)_(source file)_(counter)_ih + set(headerTargetName + ${SDK_BUILD_IR_TARGET}_${sourceFileName}_${SDK_BUILD_IR_COUNTER}_ih) + + if(NOT MSVC) + # Add a custom target for the generated integration header + add_custom_target(${headerTargetName} DEPENDS ${outputDeviceFile} ${outputSyclFile}) + add_dependencies(${SDK_BUILD_IR_TARGET} ${headerTargetName}) + endif() + + # This property can be set on a per-target basis to indicate that the + # integration header should appear after the main source listing + get_target_property(includeAfter ${SDK_ADD_SYCL_TARGET} COMPUTECPP_INCLUDE_AFTER) + + if(includeAfter) + # Change the source file to the integration header - e.g. + # g++ -c source_file_name.cpp.sycl + get_target_property(current_sources ${SDK_BUILD_IR_TARGET} SOURCES) + # Remove absolute path to source file + list(REMOVE_ITEM current_sources ${SDK_BUILD_IR_SOURCE}) + # Remove relative path to source file + string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "" + rel_source_file ${SDK_BUILD_IR_SOURCE} + ) + list(REMOVE_ITEM current_sources ${rel_source_file}) + # Add SYCL header to source list + list(APPEND current_sources ${outputSyclFile}) + set_property(TARGET ${SDK_BUILD_IR_TARGET} + PROPERTY SOURCES ${current_sources}) + # CMake/gcc don't know what language a .sycl file is, so tell them + set_property(SOURCE ${outputSyclFile} PROPERTY LANGUAGE CXX) + set(includedFile ${SDK_BUILD_IR_SOURCE}) + set(cppFile ${outputSyclFile}) + else() + set_property(SOURCE ${outputSyclFile} PROPERTY HEADER_FILE_ONLY ON) + set(includedFile ${outputSyclFile}) + set(cppFile ${SDK_BUILD_IR_SOURCE}) + endif() + + # Force inclusion of the integration header for the host compiler + if(MSVC) + # Group SYCL files inside Visual Studio + source_group("SYCL" FILES ${outputSyclFile}) + + if(includeAfter) + # Allow the source file to be edited using Visual Studio. + # It will be added as a header file so it won't be compiled. + set_property(SOURCE ${SDK_BUILD_IR_SOURCE} PROPERTY HEADER_FILE_ONLY true) + endif() + + # Add both source and the sycl files to the VS solution. + target_sources(${SDK_BUILD_IR_TARGET} PUBLIC ${SDK_BUILD_IR_SOURCE} ${outputSyclFile}) + + set(forceIncludeFlags "/FI${includedFile} /TP") + else() + set(forceIncludeFlags "-include ${includedFile} -x c++") + endif() + + set_property( + SOURCE ${cppFile} + APPEND_STRING PROPERTY COMPILE_FLAGS "${forceIncludeFlags}" + ) + +endfunction(__build_ir) + +####################### +# add_sycl_to_target +####################### +# +# Adds a SYCL compilation custom command associated with an existing +# target and sets a dependancy on that new command. +# +# TARGET : Name of the target to add SYCL to. +# SOURCES : Source files to be compiled for SYCL. +# +function(add_sycl_to_target) + set(options) + set(one_value_args + TARGET + ) + set(multi_value_args + SOURCES + ) + cmake_parse_arguments(SDK_ADD_SYCL + "${options}" + "${one_value_args}" + "${multi_value_args}" + ${ARGN} + ) + + set_target_properties(${SDK_ADD_SYCL_TARGET} PROPERTIES LINKER_LANGUAGE CXX) + + # If the CXX compiler is set to compute++ enable the driver. + get_filename_component(cmakeCxxCompilerFileName "${CMAKE_CXX_COMPILER}" NAME) + if("${cmakeCxxCompilerFileName}" STREQUAL "compute++") + if(MSVC) + message(FATAL_ERROR "The compiler driver is not supported by this system, + revert the CXX compiler to your default host compiler.") + endif() + + get_target_property(includeAfter ${SDK_ADD_SYCL_TARGET} COMPUTECPP_INCLUDE_AFTER) + if(includeAfter) + list(APPEND COMPUTECPP_USER_FLAGS -fsycl-ih-last) + endif() + list(INSERT COMPUTECPP_DEVICE_COMPILER_FLAGS 0 -sycl-driver) + # Prepend COMPUTECPP_DEVICE_COMPILER_FLAGS and append COMPUTECPP_USER_FLAGS + foreach(prop COMPILE_OPTIONS INTERFACE_COMPILE_OPTIONS) + get_target_property(target_compile_options ${SDK_ADD_SYCL_TARGET} ${prop}) + if(NOT target_compile_options) + set(target_compile_options "") + endif() + set_property( + TARGET ${SDK_ADD_SYCL_TARGET} + PROPERTY ${prop} + ${COMPUTECPP_DEVICE_COMPILER_FLAGS} + ${target_compile_options} + ${COMPUTECPP_USER_FLAGS} + ) + endforeach() + else() + set(fileCounter 0) + list(INSERT COMPUTECPP_DEVICE_COMPILER_FLAGS 0 -sycl) + # Add custom target to run compute++ and generate the integration header + foreach(sourceFile ${SDK_ADD_SYCL_SOURCES}) + if(NOT IS_ABSOLUTE ${sourceFile}) + set(sourceFile "${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile}") + endif() + __build_ir( + TARGET ${SDK_ADD_SYCL_TARGET} + SOURCE ${sourceFile} + COUNTER ${fileCounter} + ) + MATH(EXPR fileCounter "${fileCounter} + 1") + endforeach() + endif() + + set_property(TARGET ${SDK_ADD_SYCL_TARGET} + APPEND PROPERTY LINK_LIBRARIES ComputeCpp::ComputeCpp) + set_property(TARGET ${SDK_ADD_SYCL_TARGET} + APPEND PROPERTY INTERFACE_LINK_LIBRARIES ComputeCpp::ComputeCpp) +endfunction(add_sycl_to_target) diff --git a/cmake/toolchains/arm-gcc-poky.cmake b/cmake/toolchains/arm-gcc-poky.cmake new file mode 100644 index 0000000..03824e9 --- /dev/null +++ b/cmake/toolchains/arm-gcc-poky.cmake @@ -0,0 +1,34 @@ +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR ARM64) +set(SDK_POKY_ROOT $ENV{SDK_POKY_ROOT}) + +if(NOT SDK_POKY_ROOT) + message(FATAL_ERROR + "Please set SDK_POKY_ROOT in the environment when crosscompiling.") +endif() + +set(COMPUTECPP_TARGET_TRIPLE aarch64-poky-linux) +set(COMPUTECPP_TOOLCHAIN_DIR ${SDK_POKY_ROOT}/x86_64-pokysdk-linux) +set(COMPUTECPP_SYSROOT_DIR ${SDK_POKY_ROOT}/aarch64-poky-linux) +# Adding this as the GCC toolchain makes compute++ not find headers +set(COMPUTECPP_DONT_USE_TOOLCHAIN ON) + +set(CMAKE_C_COMPILER "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-gcc" CACHE PATH "gcc") +set(CMAKE_CXX_COMPILER "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-g++" CACHE PATH "g++") +set(CMAKE_AR "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-ar" CACHE PATH "archive") +set(CMAKE_LINKER "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-ld" CACHE PATH "linker") +set(CMAKE_NM "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-nm" CACHE PATH "nm") +set(CMAKE_OBJCOPY "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-objcopy" CACHE PATH "objcopy") +set(CMAKE_OBJDUMP "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-objdump" CACHE PATH "objdump") +set(CMAKE_STRIP "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-strip" CACHE PATH "strip") +set(CMAKE_RANLIB "${COMPUTECPP_TOOLCHAIN_DIR}/usr/bin/${COMPUTECPP_TARGET_TRIPLE}/${COMPUTECPP_TARGET_TRIPLE}-ranlib" CACHE PATH "ranlib") + +set(CMAKE_FIND_ROOT_PATH ${COMPUTECPP_SYSROOT_DIR}) +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + +set(CMAKE_SYSROOT "${COMPUTECPP_SYSROOT_DIR}") + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__aarch64__ --sysroot=${COMPUTECPP_SYSROOT_DIR}" CACHE INTERNAL "") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__aarch64__ --sysroot=${COMPUTECPP_SYSROOT_DIR}" CACHE INTERNAL "") + +set(CMAKE_CXX_LINK_EXECUTABLE " -o " CACHE INTERNAL "") diff --git a/cmake/toolchains/gcc-generic.cmake b/cmake/toolchains/gcc-generic.cmake new file mode 100644 index 0000000..3eef6ca --- /dev/null +++ b/cmake/toolchains/gcc-generic.cmake @@ -0,0 +1,18 @@ +set(CMAKE_SYSTEM_NAME Linux) +set(COMPUTECPP_SYSROOT_DIR $ENV{COMPUTECPP_SYSROOT_DIR}) +set(COMPUTECPP_TOOLCHAIN_DIR $ENV{COMPUTECPP_TOOLCHAIN_DIR}) +set(COMPUTECPP_TARGET_TRIPLE $ENV{COMPUTECPP_TARGET_TRIPLE}) + +if(NOT COMPUTECPP_SYSROOT_DIR OR + NOT COMPUTECPP_TOOLCHAIN_DIR OR + NOT COMPUTECPP_TARGET_TRIPLE +) + message(FATAL_ERROR + "Please set all of COMPUTECPP_TARGET_TRIPLE, COMPUTECPP_SYSROOT_DIR and " + "COMPUTECPP_TOOLCHAIN_DIR in the environment when crosscompiling.") +endif() + +set(CMAKE_SYSROOT ${COMPUTECPP_SYSROOT_DIR}) +set(CMAKE_C_COMPILER ${COMPUTECPP_TOOLCHAIN_DIR}/bin/${COMPUTECPP_TARGET_TRIPLE}-gcc) +set(CMAKE_CXX_COMPILER ${COMPUTECPP_TOOLCHAIN_DIR}/bin/${COMPUTECPP_TARGET_TRIPLE}-g++) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER) diff --git a/HC.make b/legacy/HC.make similarity index 89% rename from HC.make rename to legacy/HC.make index a79acf4..b902ada 100644 --- a/HC.make +++ b/legacy/HC.make @@ -13,7 +13,7 @@ CXXFLAGS+=-DNTILES=$(TBSIZE) endif -hc-stream: main.cpp HCStream.cpp +hc-stream: ../main.cpp HCStream.cpp $(HCC) $(CXXFLAGS) -DHC $^ $(LDFLAGS) $(EXTRA_FLAGS) -o $@ .PHONY: clean diff --git a/HCStream.cpp b/legacy/HCStream.cpp similarity index 100% rename from HCStream.cpp rename to legacy/HCStream.cpp diff --git a/HCStream.h b/legacy/HCStream.h similarity index 100% rename from HCStream.h rename to legacy/HCStream.h diff --git a/register_models.cmake b/register_models.cmake new file mode 100644 index 0000000..03424bc --- /dev/null +++ b/register_models.cmake @@ -0,0 +1,136 @@ + +#function(switch_expr INPUT OUTPUT) +# list(LENGTH ARGN N) +# math(EXPR EVEN "${N} % 2") +# if (NOT EVEN EQUAL 0) +# message(FATAL_ERROR "Expr must be a list of string pairs, ${EVEN}") +# endif () +# math(EXPR N_ "${N}-1") +# foreach (idx RANGE 0 ${N_} 2) +# math(EXPR KEY_IDX "${idx} + 0") +# math(EXPR VALUE_IDX "${idx} + 1") +# list(GET ARGN ${KEY_IDX} KEY) +# list(GET ARGN ${VALUE_IDX} VALUE) +# if (${KEY} STREQUAL ${INPUT}) +# set(${OUTPUT} ${VALUE} PARENT_SCOPE) +# break() +# endif () +# endforeach () +#endfunction() +# + +macro(register_link_library) + list(APPEND LINK_LIBRARIES ${ARGN}) +endmacro() + +macro(register_append_cxx_flags CONFIG) + if ("${CONFIG}" STREQUAL "RELEASE" OR "${CONFIG}" STREQUAL "ANY") + list(APPEND RELEASE_FLAGS ${ARGN}) + elseif ("${CONFIG}" STREQUAL "DEBUG" OR "${CONFIG}" STREQUAL "ANY") + list(APPEND DEBUG_FLAGS ${ARGN}) + else () + message(FATAL_ERROR "register_flags supports only RELEASE, DEBUG, or ANY for all configs, got `${CONFIG}`") + endif () +endmacro() + +macro(register_append_link_flags) + list(APPEND LINK_FLAGS ${ARGN}) +endmacro() + +macro(register_definitions) + list(APPEND IMPL_DEFINITIONS ${ARGN}) +endmacro() + +macro(register_flag_required NAME DESCRIPTION) + list(APPEND CUSTOM_FLAGS_TRIPLE "${NAME}" "${DESCRIPTION}" ON "") +endmacro() + +macro(register_flag_optional NAME DESCRIPTION DEFAULT) + list(APPEND CUSTOM_FLAGS_TRIPLE "${NAME}" "${DESCRIPTION}" OFF "${DEFAULT}") +endmacro() + +function(registered_flags_action ACTION OUT) + list(LENGTH CUSTOM_FLAGS_TRIPLE NFLAGS) + if (NOT NFLAGS EQUAL "0") + + if (${ACTION} STREQUAL "print") + set(LINE "Supported flags:\n\n") + elseif (${ACTION} STREQUAL "check") + set(LINE "Picked up model specific flags for this build:\n\n") + endif () + + + math(EXPR NFLAGS "${NFLAGS}-1") + foreach (idx RANGE 0 ${NFLAGS} 4) + math(EXPR NAME_IDX "${idx} + 0") + math(EXPR DESCRIPTION_IDX "${idx} + 1") + math(EXPR REQUIRED_IDX "${idx} + 2") + math(EXPR DEFAULT_VALUE_IDX "${idx} + 3") + list(GET CUSTOM_FLAGS_TRIPLE ${NAME_IDX} NAME) + list(GET CUSTOM_FLAGS_TRIPLE ${DESCRIPTION_IDX} DESCRIPTION) + list(GET CUSTOM_FLAGS_TRIPLE ${REQUIRED_IDX} REQUIRED) + list(GET CUSTOM_FLAGS_TRIPLE ${DEFAULT_VALUE_IDX} DEFAULT_VALUE) + if (${ACTION} STREQUAL "print") + if (${REQUIRED}) + set(DEFAULT_VALUE "(required)") + else () + set(DEFAULT_VALUE "(optional, default=${DEFAULT_VALUE})") + endif () + set(LINE "${LINE} ${NAME} ${DEFAULT_VALUE}: ${DESCRIPTION}\n") + elseif (${ACTION} STREQUAL "check") + if (${REQUIRED}) + # required flag + if (NOT DEFINED ${NAME}) + message(FATAL_ERROR "`${NAME}` is not set! (${DESCRIPTION})") + endif () + else () + # optional flag with default + if (NOT DEFINED ${NAME}) + set(${NAME} "${DEFAULT_VALUE}" PARENT_SCOPE) # setting PARENT_SCOPE does not affect local scope + set(${NAME} "${DEFAULT_VALUE}") + endif () + endif () + set(LINE "${LINE} ${NAME} = `${${NAME}}`\n") + else () + message(FATAL_ERROR "action `${ACTION}` not supported") + endif () + endforeach () + endif () + set(${OUT} "${LINE}" PARENT_SCOPE) +endfunction() + + + + +macro(register_model NAME PREPROCESSOR_NAME) + string(TOUPPER ${NAME} MODEL_UPPER) + list(APPEND REGISTERED_MODELS "${NAME}") + + list(APPEND IMPL_${MODEL_UPPER}_SOURCES "${ARGN}") + list(APPEND IMPL_${MODEL_UPPER}_DEFINITIONS "${PREPROCESSOR_NAME}") + +# if ("${MODEL_UPPER}" STREQUAL ${MODEL}) +# +# set(MODEL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${NAME}.cmake) +# +# if (NOT EXISTS ${MODEL_FILE}) +# message(FATAL_ERROR "${MODEL_FILE} not found, perhaps it needs to be implemented?") +# endif () +# include(${MODEL_FILE}) +# +# endif () +endmacro() + + +macro(load_model MODEL) + string(TOUPPER "${MODEL}" MODEL_UPPER) + if ("${MODEL_UPPER}" IN_LIST REGISTERED_MODELS) + set(MODEL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${MODEL_UPPER}.cmake) + if (NOT EXISTS ${MODEL_FILE}) + message(FATAL_ERROR "${MODEL_FILE} not found, perhaps it needs to be implemented?") + endif () + include(${MODEL_FILE}) + list(APPEND IMPL_SOURCES ${IMPL_${MODEL_UPPER}_SOURCES}) + list(APPEND IMPL_DEFINITIONS ${IMPL_${MODEL_UPPER}_DEFINITIONS}) + endif () +endmacro() \ No newline at end of file From cda17d7b306640560991751cc84cff2a63c7f073 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Mon, 8 Mar 2021 20:19:44 +0000 Subject: [PATCH 02/11] Add -march=native --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3e20485..5a85383 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,7 +72,7 @@ if (NOT DEFINED DEBUG_FLAGS) endif () if (NOT DEFINED RELEASE_FLAGS) # XXX switch on compiler type for the best default here, right now it's just GCC-compatible flags - set(RELEASE_FLAGS -O3) + set(RELEASE_FLAGS -O3 -march=native) endif () message(STATUS "Common ${CMAKE_BUILD_TYPE} flags are `${${BUILD_TYPE}_FLAGS}`, set ${BUILD_TYPE}_FLAGS to override") From 856822d6631728054ef022109a66e6eb8713ae8d Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Mon, 8 Mar 2021 20:31:23 +0000 Subject: [PATCH 03/11] Don't run the CI twice in an open PR --- .github/workflows/main.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 2df71cb..128e491 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -1,7 +1,6 @@ name: CI -on: - push: - pull_request: +on: [push, pull_request] + jobs: test: From 14aefecc57dd8aee0bdcb0f988bd411c921c88c6 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Thu, 11 Mar 2021 15:46:23 +0000 Subject: [PATCH 04/11] Re-add all compile and arch dependent flags Fix ACC not linking on CMake < 3.16 Fix CUDA warnings for CMP0104 and avoid repeated -O[n] flags Fix ComputeCpp not picking up custom flags [CI] Highlight compiler warnings [CI] Don't skip remaining tests when one fails [CI] Add CMake 3.13, 3.15, 3.18 checks --- .github/workflows/main.yaml | 91 ++++++++++++++++++++++++++++++------- ACC.cmake | 12 ++++- CMakeLists.txt | 40 +++++++++------- CUDA.cmake | 19 ++++++-- KOKKOS.cmake | 9 ++++ OMP.cmake | 55 ++++++++++++++++++++-- RAJA.cmake | 16 ++++++- README.md | 7 ++- SYCL.cmake | 4 +- ci-prepare-bionic.sh | 40 ++++++++++++++++ ci-test-compile.sh | 55 ++++++++++++++++------ register_models.cmake | 34 ++++++++------ 12 files changed, 307 insertions(+), 75 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 128e491..20e1034 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -21,19 +21,78 @@ jobs: - name: Setup test environment run: source ./ci-prepare-bionic.sh ./compilers VARS false || true - - name: Test compile gcc - run: ./ci-test-compile.sh ./build gcc all - - name: Test compile clang - run: ./ci-test-compile.sh ./build clang all - - name: Test compile nvhpc - run: ./ci-test-compile.sh ./build nvhpc all - - name: Test compile aocc - run: ./ci-test-compile.sh ./build aocc all - - name: Test compile aomp - run: ./ci-test-compile.sh ./build aomp all - - name: Test compile hip - run: ./ci-test-compile.sh ./build hip all - - name: Test compile dpcpp - run: ./ci-test-compile.sh ./build dpcpp all - - name: Test compile hipsycl - run: ./ci-test-compile.sh ./build hipsycl all + + - name: Test compile gcc @ CMake 3.13 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_13_BIN }} + - name: Test compile clang @ CMake 3.13 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_13_BIN }} + - name: Test compile nvhpc @ CMake 3.13 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_13_BIN }} + - name: Test compile aocc @ CMake 3.13 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_13_BIN }} + - name: Test compile aomp @ CMake 3.13 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_13_BIN }} + - name: Test compile hip @ CMake 3.13 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_13_BIN }} + - name: Test compile dpcpp @ CMake 3.13 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_13_BIN }} + - name: Test compile hipsycl @ CMake 3.13 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_13_BIN }} + + - name: Test compile gcc @ CMake 3.15 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_15_BIN }} + - name: Test compile clang @ CMake 3.15 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_15_BIN }} + - name: Test compile nvhpc @ CMake 3.15 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_15_BIN }} + - name: Test compile aocc @ CMake 3.15 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_15_BIN }} + - name: Test compile aomp @ CMake 3.15 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_15_BIN }} + - name: Test compile hip @ CMake 3.15 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_15_BIN }} + - name: Test compile dpcpp @ CMake 3.15 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_15_BIN }} + - name: Test compile hipsycl @ CMake 3.15 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_15_BIN }} + + - name: Test compile gcc @ CMake 3.18 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_18_BIN }} + - name: Test compile clang @ CMake 3.18 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_18_BIN }} + - name: Test compile nvhpc @ CMake 3.18 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_18_BIN }} + - name: Test compile aocc @ CMake 3.18 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_18_BIN }} + - name: Test compile aomp @ CMake 3.18 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_18_BIN }} + - name: Test compile hip @ CMake 3.18 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_18_BIN }} + - name: Test compile dpcpp @ CMake 3.18 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_18_BIN }} + - name: Test compile hipsycl @ CMake 3.18 + if: ${{ ! cancelled() }} + run: ./ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_18_BIN }} \ No newline at end of file diff --git a/ACC.cmake b/ACC.cmake index f951fae..86060d0 100644 --- a/ACC.cmake +++ b/ACC.cmake @@ -45,7 +45,17 @@ register_flag_optional(TARGET_PROCESSOR macro(setup) find_package(OpenACC REQUIRED) - register_link_library(OpenACC::OpenACC_CXX) + + if(${CMAKE_VERSION} VERSION_LESS "3.16.0") + # CMake didn't really implement ACC as a target before 3.16, so we append them manually + separate_arguments(OpenACC_CXX_FLAGS) + register_append_cxx_flags(ANY ${OpenACC_CXX_FLAGS}) + register_append_link_flags(${OpenACC_CXX_FLAGS}) + else() + register_link_library(OpenACC::OpenACC_CXX) + endif() + + register_definitions(restrict=__restrict) # XXX NVHPC is really new so older Cmake thinks it's PGI, which is true if ((CMAKE_CXX_COMPILER_ID STREQUAL PGI) OR (CMAKE_CXX_COMPILER_ID STREQUAL NVHPC)) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5a85383..ac45eda 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,18 +64,9 @@ if ((NOT BUILD_TYPE STREQUAL RELEASE) AND (NOT BUILD_TYPE STREQUAL DEBUG)) message(FATAL_ERROR "Only Release or Debug is supported, got `${CMAKE_BUILD_TYPE}`") endif () - -# setup common build flag defaults if there are no overrides -if (NOT DEFINED DEBUG_FLAGS) - # XXX switch on compiler type for the best default here, right now it's just GCC-compatible flags - set(DEBUG_FLAGS -O2 -fno-omit-frame-pointer) -endif () -if (NOT DEFINED RELEASE_FLAGS) - # XXX switch on compiler type for the best default here, right now it's just GCC-compatible flags - set(RELEASE_FLAGS -O3 -march=native) -endif () - -message(STATUS "Common ${CMAKE_BUILD_TYPE} flags are `${${BUILD_TYPE}_FLAGS}`, set ${BUILD_TYPE}_FLAGS to override") +# setup some defaults flags for everything +set(DEFAULT_DEBUG_FLAGS -O2 -fno-omit-frame-pointer) +set(DEFAULT_RELEASE_FLAGS -O3 -march=native) macro(hint_flag FLAG DESCRIPTION) if (NOT DEFINED ${FLAG}) @@ -146,13 +137,28 @@ setup() # CMake insists that -O2 (or equivalent) is the universally accepted optimisation level # we remove that here and use our own _FLAGS -string(REGEX REPLACE "([\\/\\-]O.)" "" - CMAKE_CXX_FLAGS_${BUILD_TYPE} "${CMAKE_CXX_FLAGS_${BUILD_TYPE}}") +wipe_gcc_style_optimisation_flags(CMAKE_CXX_FLAGS_${BUILD_TYPE}) + +message(STATUS "Default ${CMAKE_BUILD_TYPE} flags are `${DEFAULT_${BUILD_TYPE}_FLAGS}`, set ${BUILD_TYPE}_FLAGS to override (CXX_EXTRA_* flags are not affected)") + +# setup common build flag defaults if there are no overrides +if (NOT DEFINED ${BUILD_TYPE}_FLAGS) + set(ACTUAL_${BUILD_TYPE}_FLAGS ${DEFAULT_${BUILD_TYPE}_FLAGS}) + elseif() + set(ACTUAL_${BUILD_TYPE}_FLAGS ${${BUILD_TYPE}_FLAGS}) +endif () + message(STATUS "CXX vendor : ${CMAKE_CXX_COMPILER_ID} (${CMAKE_CXX_COMPILER})") +message(STATUS "Platform : ${CMAKE_SYSTEM_PROCESSOR}") message(STATUS "Sources : ${IMPL_SOURCES}") message(STATUS "Libraries : ${LINK_LIBRARIES}") -message(STATUS "CXX Flags : ${CMAKE_CXX_FLAGS_${BUILD_TYPE}} ${${BUILD_TYPE}_FLAGS} ${CXX_EXTRA_FLAGS} ") +message(STATUS "CXX Flags : ${CMAKE_CXX_FLAGS_${BUILD_TYPE}} ${ACTUAL_${BUILD_TYPE}_FLAGS} ${CXX_EXTRA_FLAGS} + CXX flags derived from (CMake + (Override ? Override : Default) + Extras), where: + CMake = `${CMAKE_CXX_FLAGS_${BUILD_TYPE}}` + Default = `${DEFAULT_${BUILD_TYPE}_FLAGS}` + Override (RELEASE_FLAGS) = `${${BUILD_TYPE}_FLAGS}` + Extras (CXX_EXTRA_FLAGS) = `${CXX_EXTRA_FLAGS}`") message(STATUS "Link Flags : ${LINK_FLAGS} ${CXX_EXTRA_LINK_FLAGS}") message(STATUS "Linker Flags: ${CMAKE_EXE_LINKER_FLAGS} ${CXX_EXTRA_LINKER_FLAGS} ") message(STATUS "Defs : ${IMPL_DEFINITIONS}") @@ -168,8 +174,8 @@ if (CXX_EXTRA_LIBRARIES) target_link_libraries(${EXE_NAME} PUBLIC ${CXX_EXTRA_LIBRARIES}) endif () -target_compile_options(${EXE_NAME} PUBLIC "$<$:${RELEASE_FLAGS};${CXX_EXTRA_FLAGS}>") -target_compile_options(${EXE_NAME} PUBLIC "$<$:${DEBUG_FLAGS};${CXX_EXTRA_FLAGS}>") +target_compile_options(${EXE_NAME} PUBLIC "$<$:${ACTUAL_RELEASE_FLAGS};${CXX_EXTRA_FLAGS}>") +target_compile_options(${EXE_NAME} PUBLIC "$<$:${ACTUAL_DEBUG_FLAGS};${CXX_EXTRA_FLAGS}>") target_link_options(${EXE_NAME} PUBLIC LINKER:${CXX_EXTRA_LINKER_FLAGS}) target_link_options(${EXE_NAME} PUBLIC ${LINK_FLAGS} ${CXX_EXTRA_LINK_FLAGS}) diff --git a/CUDA.cmake b/CUDA.cmake index 0fb804f..8c6b568 100644 --- a/CUDA.cmake +++ b/CUDA.cmake @@ -12,7 +12,7 @@ register_flag_optional(MEM "Device memory mode: register_flag_required(CMAKE_CUDA_COMPILER "Path to the CUDA nvcc compiler") -# XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes +# XXX we may want to drop this eventually and use CMAKE_CUDA_ARCHITECTURES directly register_flag_required(CUDA_ARCH "Nvidia architecture, will be passed in via `-arch=` (e.g `sm_70`) for nvcc") @@ -22,9 +22,22 @@ register_flag_optional(CUDA_EXTRA_FLAGS macro(setup) + + # XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes + if(POLICY CMP0104) + cmake_policy(SET CMP0104 OLD) + endif() + enable_language(CUDA) register_definitions(MEM=${MEM}) - set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} -arch=${CUDA_ARCH} ${CUDA_EXTRA_FLAGS}) - message(STATUS "NVCC flags: ${CMAKE_CUDA_FLAGS}") + + # add -forward-unknown-to-host-compiler for compatibility reasons + set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-forward-unknown-to-host-compiler -arch=${CUDA_ARCH}" ${CUDA_EXTRA_FLAGS}) + + # CMake defaults to -O2 for CUDA at Release, let's wipe that and use the global RELEASE_FLAG + # appended later + wipe_gcc_style_optimisation_flags(CMAKE_CUDA_FLAGS_${BUILD_TYPE}) + + message(STATUS "NVCC flags: ${CMAKE_CUDA_FLAGS} ${CMAKE_CUDA_FLAGS_${BUILD_TYPE}}") endmacro() diff --git a/KOKKOS.cmake b/KOKKOS.cmake index 90abb19..9a9e54c 100644 --- a/KOKKOS.cmake +++ b/KOKKOS.cmake @@ -12,6 +12,9 @@ register_flag_required(KOKKOS_IN_TREE See https://github.com/kokkos/kokkos/blob/master/BUILD.md for all available options") +# compiler vendor and arch specific flags +set(KOKKOS_FLAGS_CPU_INTEL -qopt-streaming-stores=always) + macro(setup) cmake_policy(SET CMP0074 NEW) #see https://github.com/kokkos/kokkos/blob/master/BUILD.md @@ -25,6 +28,12 @@ macro(setup) message(FATAL_ERROR "`${KOKKOS_IN_TREE}` does not exist") endif () + register_append_compiler_and_arch_specific_cxx_flags( + KOKKOS_FLAGS_CPU + ${CMAKE_CXX_COMPILER_ID} + ${CMAKE_SYSTEM_PROCESSOR} + ) + endmacro() diff --git a/OMP.cmake b/OMP.cmake index 2d16b81..776c9b2 100644 --- a/OMP.cmake +++ b/OMP.cmake @@ -32,21 +32,51 @@ # NVHPC = NVIDIA HPC SDK Compiler (nvidia.com) +# CMAKE_SYSTEM_PROCESSOR is set via `uname -p`, we have: +# Power9 = ppc64le +# x64 = x86_64 +# arm64 = aarch64 +# + + #predefined offload flags based on compiler id -set(OMP_FLAGS_INTEL_CPU -qopt-streaming-stores=always) -set(OMP_FLAGS_OFFLOAD_INTEL -qnextgen -fiopenmp -fopenmp-targets=spir64) -set(OMP_FLAGS_OFFLOAD_GNU_NVIDIA -foffload=nvptx-none) -set(OMP_FLAGS_OFFLOAD_GNU_AMD -foffload=amdgcn-amdhsa) + + +set(OMP_FLAGS_OFFLOAD_INTEL + -qnextgen -fiopenmp -fopenmp-targets=spir64) +set(OMP_FLAGS_OFFLOAD_GNU_NVIDIA + -foffload=nvptx-none) +set(OMP_FLAGS_OFFLOAD_GNU_AMD + -foffload=amdgcn-amdhsa) set(OMP_FLAGS_OFFLOAD_CLANG_NVIDIA -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda) set(OMP_FLAGS_OFFLOAD_CLANG_AMD -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa) -set(OMP_FLAGS_OFFLOAD_CLANG_ARCH_FLAG -march=) +set(OMP_FLAGS_OFFLOAD_CLANG_ARCH_FLAG + -march=) # prefix only, arch appended by the vendor:arch tuple + + +set(OMP_FLAGS_CPU_INTEL + -qopt-streaming-stores=always) +set(OMP_FLAGS_CPU_GNU_PPC64LE + -mcpu=native) +set(OMP_FLAGS_CPU_XL + -O5 -qarch=auto -qtune=auto) + +# NEC +set(OMP_FLAGS_CPU_NEC -O4 -finline) register_flag_optional(CMAKE_CXX_COMPILER "Any CXX compiler that supports OpenMP as per CMake detection (and offloading if enabled with `OFFLOAD`)" "c++") +register_flag_optional(ARCH + "This overrides CMake's CMAKE_SYSTEM_PROCESSOR detection which uses (uname -p), this is mainly for use with + specialised accelerators only and not to be confused with offload which is is mutually exclusive with this. + Supported values are: + - NEC" + "") + register_flag_optional(OFFLOAD "Whether to use OpenMP offload, the format is |ON|OFF. We support a small set of known offload flags for clang, gcc, and icpx. @@ -75,10 +105,25 @@ macro(setup) register_link_library(OpenMP::OpenMP_CXX) string(TOUPPER ${CMAKE_CXX_COMPILER_ID} COMPILER) + if(NOT ARCH) + string(TOUPPER ${CMAKE_SYSTEM_PROCESSOR} ARCH) + else() + message(STATUS "Using custom arch: ${ARCH}") + endif() + + if (("${OFFLOAD}" STREQUAL OFF) OR (NOT DEFINED OFFLOAD)) # no offload + # resolve the CPU specific flags + # starting with ${COMPILER_VENDOR}_${PLATFORM_ARCH}, then try ${COMPILER_VENDOR}, and then give up + register_append_compiler_and_arch_specific_cxx_flags( + OMP_FLAGS_CPU + ${COMPILER} + ${ARCH} + ) + elseif ("${OFFLOAD}" STREQUAL ON) # offload but with custom flags register_definitions(OMP_TARGET_GPU) diff --git a/RAJA.cmake b/RAJA.cmake index 876896e..7e5f284 100644 --- a/RAJA.cmake +++ b/RAJA.cmake @@ -30,6 +30,8 @@ register_flag_optional(CUDA_EXTRA_FLAGS "[TARGET==NVIDIA only] Additional CUDA flags passed to nvcc, this is appended after `CUDA_ARCH`" "") +# compiler vendor and arch specific flags +set(RAJA_FLAGS_CPU_INTEL -qopt-streaming-stores=always) macro(setup) @@ -56,9 +58,15 @@ macro(setup) set(ENABLE_CUDA ${ENABLE_CUDA} CACHE BOOL "" FORCE) if (ENABLE_CUDA) + + # XXX CMake 3.18 supports CMAKE_CUDA_ARCHITECTURES/CUDA_ARCHITECTURES but we support older CMakes + if(POLICY CMP0104) + cmake_policy(SET CMP0104 OLD) + endif() + # RAJA needs all the cuda stuff setup before including! set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc) - set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-extended-lambda -arch=${CUDA_ARCH}" ${CUDA_EXTRA_FLAGS}) + set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-forward-unknown-to-host-compiler -extended-lambda -arch=${CUDA_ARCH}" ${CUDA_EXTRA_FLAGS}) list(APPEND CMAKE_CUDA_FLAGS) message(STATUS "NVCC flags: ${CMAKE_CUDA_FLAGS}") @@ -81,4 +89,10 @@ macro(setup) endif () + register_append_compiler_and_arch_specific_cxx_flags( + RAJA_FLAGS_CPU + ${CMAKE_CXX_COMPILER_ID} + ${CMAKE_SYSTEM_PROCESSOR} + ) + endmacro() diff --git a/README.md b/README.md index de8401e..9b3d775 100644 --- a/README.md +++ b/README.md @@ -59,16 +59,19 @@ Drivers, compiler and software applicable to whichever implementation you would ### CMake -The project supports building with CMake >= 3.13.0. +The project supports building with CMake >= 3.13.0, it can be installed without root via the [official script](https://cmake.org/download/). As with any CMake project, first configure the project: ```shell > cd babelstream -> cmake -Bbuild -H. -DMODEL= # configure the build, build type defaults to Release +> cmake -Bbuild -H. -DMODEL= # configure the build, build type defaults to Release > cmake --build build # compile it > ./build/babelstream # executable available at ./build/ ``` +By default, we have defined a set of optimal flags for known HPC compilers and assigned those to `RELEASE_FLAG`. +You can override this if required. + To find out what flag each model supports or requires, simply configure while only specifying the model. For example: ```shell diff --git a/SYCL.cmake b/SYCL.cmake index 1ff6596..c35f435 100644 --- a/SYCL.cmake +++ b/SYCL.cmake @@ -46,7 +46,6 @@ macro(setup) list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules) set(ComputeCpp_DIR ${SYCL_COMPILER_DIR}) - set(COMPUTECPP_USER_FLAGS -O3 -no-serial-memop) # don't point to the CL dir as the imports already have the CL prefix set(OpenCL_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/CL") @@ -55,6 +54,9 @@ macro(setup) # ComputeCpp needs OpenCL find_package(ComputeCpp REQUIRED) + # this must come after FindComputeCpp (!) + set(COMPUTECPP_USER_FLAGS -O3 -no-serial-memop) + elseif (${SYCL_COMPILER} STREQUAL "DPCPP") set(CMAKE_CXX_COMPILER ${SYCL_COMPILER_DIR}/bin/clang++) include_directories(${SYCL_COMPILER_DIR}/include/sycl) diff --git a/ci-prepare-bionic.sh b/ci-prepare-bionic.sh index 31b76ee..904ad8e 100755 --- a/ci-prepare-bionic.sh +++ b/ci-prepare-bionic.sh @@ -76,6 +76,17 @@ get_and_install_deb() { } +get() { + local name="$1" + local pkg_url="$2" + if [ "$SETUP" = true ]; then + if [ ! -f "$name" ] || [ "$FORCE_DOWNLOAD" = true ]; then + echo "$name not found, downloading..." + wget -q --show-progress --progress=bar:force:noscroll "$pkg_url" -O "$name" + fi + fi +} + get_and_untar() { local name="$1" local pkg_url="$2" @@ -306,9 +317,37 @@ else echo "Running locally, defaulting to standard export" fi +setup_cmake() { + + echo "Preparing CMake" + + local cmake_release="https://github.com/Kitware/CMake/releases/download" + + get "cmake-3.13.sh" "$cmake_release/v3.13.4/cmake-3.13.4-Linux-x86_64.sh" + chmod +x "./cmake-3.13.sh" && sh "./cmake-3.13.sh" --skip-license --include-subdir + export_var CMAKE_3_13_BIN "$PWD/cmake-3.13.4-Linux-x86_64/bin/cmake" + verify_bin_exists "$CMAKE_3_13_BIN" + "$CMAKE_3_13_BIN" --version + + get "cmake-3.15.sh" "$cmake_release/v3.15.7/cmake-3.15.7-Linux-x86_64.sh" + chmod +x "./cmake-3.15.sh" && "./cmake-3.15.sh" --skip-license --include-subdir + export_var CMAKE_3_15_BIN "$PWD/cmake-3.15.7-Linux-x86_64/bin/cmake" + verify_bin_exists "$CMAKE_3_15_BIN" + "$CMAKE_3_15_BIN" --version + + get "cmake-3.18.sh" "$cmake_release/v3.18.6/cmake-3.18.6-Linux-x86_64.sh" + chmod +x "./cmake-3.18.sh" && "./cmake-3.18.sh" --skip-license --include-subdir + export_var CMAKE_3_18_BIN "$PWD/cmake-3.18.6-Linux-x86_64/bin/cmake" + verify_bin_exists "$CMAKE_3_18_BIN" + "$CMAKE_3_18_BIN" --version + + check_size + +} if [ "$PARALLEL" = true ]; then (setup_clang_gcc && setup_rocm && setup_hipsycl) & # these need apt so run sequentially + setup_cmake & setup_oclcpu & setup_aocc & setup_nvhpc & @@ -318,6 +357,7 @@ if [ "$PARALLEL" = true ]; then setup_raja & wait else + setup_cmake setup_aocc setup_oclcpu setup_nvhpc diff --git a/ci-test-compile.sh b/ci-test-compile.sh index 9a22140..7a1b836 100755 --- a/ci-test-compile.sh +++ b/ci-test-compile.sh @@ -8,6 +8,7 @@ export CCACHE_DISABLE=1 BUILD_DIR=${1:-build} COMPILER=${2:-all} MODEL=${3:-all} +CMAKE_BIN=${4} LOG_DIR="$BUILD_DIR" @@ -45,13 +46,13 @@ run_build() { set +e # shellcheck disable=SC2086 - cmake -B"$build" -H. \ + "$CMAKE_BIN" -B"$build" -H. \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DMODEL="$model" $flags &>>"$log" local cmake_code=$? - cmake --build "$build" --target babelstream -j "$(nproc)" &>>"$log" + "$CMAKE_BIN" --build "$build" --target babelstream -j "$(nproc)" &>>"$log" local cmake_code=$? set -e @@ -59,17 +60,19 @@ run_build() { if [[ -f "$bin" ]]; then echo "$(tput setaf 2)[PASS!]($model->$build)$(tput sgr0): -DMODEL=$model $flags" # shellcheck disable=SC2002 + cat "$log" | sed '/^--/d' | grep -i "/bin/nvcc" | sed 's/^/ /' cat "$log" | sed '/^--/d' | grep -i "$grep_kw" | sed 's/^/ /' + cat "$log" | sed '/^--/d' | grep -i "warning" | sed "s/.*/ $(tput setaf 3)&$(tput sgr0)/" else echo "$(tput setaf 1)[FAIL!]($model->$build)$(tput sgr0): -DMODEL=$model $flags" - echo " CMake exited with code $cmake_code, see full build log at $log, reproduced below:" + echo " $(tput setaf 1)CMake exited with code $cmake_code, see full build log at $log, reproduced below:$(tput sgr0)" cat "$log" exit 1 fi echo " $(tput setaf 4)$(file "$bin")$(tput sgr0)" } -## +### #KOKKOS_SRC="/home/tom/Downloads/kokkos-3.3.00" #RAJA_SRC="/home/tom/Downloads/RAJA-v0.13.0" # @@ -82,10 +85,6 @@ run_build() { #NVHPC_CUDA_DIR="$NVSDK/cuda/11.2" #"$NVSDK/compilers/bin/makelocalrc" "$NVSDK/compilers/bin/" -x # -##NVHPC_NVCXX="/opt/nvidia/hpc_sdk/Linux_x86_64/21.1/compilers/bin/nvc++" -##NVHPC_NVCC="/opt/nvidia/hpc_sdk/Linux_x86_64/21.1/cuda/11.2/bin/nvcc" -##NVHPC_CUDA_DIR="/opt/nvidia/hpc_sdk/Linux_x86_64/21.1/cuda/11.2" -# #AOCC_CXX="/opt/AMD/aocc-compiler-2.3.0/bin/clang++" #AOMP_CXX="/usr/lib/aomp/bin/clang++" #OCL_LIB="/home/tom/Downloads/oclcpuexp-2020.11.11.0.04_rel/x64/libOpenCL.so" @@ -98,13 +97,15 @@ run_build() { #HIPSYCL_DIR="/opt/hipsycl/cff515c/" # #ICPX_CXX="/opt/intel/oneapi/compiler/2021.1.2/linux/bin/icpx" +#ICPC_CXX="/opt/intel/oneapi/compiler/2021.1.2/linux/bin/intel64/icpc" # -#GCC_STD_PAR_LIB="" -#CLANG_STD_PAR_LIB="" +#GCC_STD_PAR_LIB="tbb" +#CLANG_STD_PAR_LIB="tbb" #GCC_OMP_OFFLOAD_AMD=false #GCC_OMP_OFFLOAD_NVIDIA=true #CLANG_OMP_OFFLOAD_AMD=false #CLANG_OMP_OFFLOAD_NVIDIA=false +### AMD_ARCH="gfx_903" NV_ARCH="sm_70" @@ -196,10 +197,26 @@ build_hip() { } build_icpx() { - source /opt/intel/oneapi/setvars.sh -force + # clang derived + set +u + source /opt/intel/oneapi/setvars.sh -force || true + set -u run_build intel_build "${ICPX_CXX:?}" OMP "-DCMAKE_CXX_COMPILER=${ICPX_CXX:?} -DOFFLOAD=INTEL" } +build_icpc() { + # icc/icpc + set +u + source /opt/intel/oneapi/setvars.sh -force || true + set -u + local name="intel_build" + local cxx="-DCMAKE_CXX_COMPILER=${ICPC_CXX:?}" + run_build $name "${ICPC_CXX:?}" OMP "$cxx" + run_build $name "${ICPC_CXX:?}" OCL "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" + run_build $name "${ICPC_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}" + run_build $name "${ICPC_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON" +} + build_computecpp() { run_build computecpp_build "compute++" SYCL "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \ -DSYCL_COMPILER=COMPUTECPP \ @@ -223,9 +240,8 @@ build_hipsycl() { -DSYCL_COMPILER_DIR=${HIPSYCL_DIR:?}" } -# TODO tested locally but can't install compilers for these two remotely without registration/license: -# build_icpx -# build_computecpp +echo "Test compiling with ${COMPILER} CXX for ${MODEL} model" +"$CMAKE_BIN" --version case "$COMPILER" in gcc) build_gcc ;; @@ -236,6 +252,12 @@ aomp) build_aomp ;; hip) build_hip ;; dpcpp) build_dpcpp ;; hipsycl) build_hipsycl ;; + +# XXX below are local only; licence or very large download required, candidate for local runner +computecpp) build_computecpp ;; +icpx) build_icpx ;; +icpc) build_icpc ;; + all) build_gcc build_clang @@ -245,6 +267,11 @@ all) build_hip build_dpcpp build_hipsycl + + build_computecpp + build_icpx + build_icpc + ;; *) echo "Unknown $COMPILER, use ALL to compile with all supported compilers" diff --git a/register_models.cmake b/register_models.cmake index 03424bc..300315e 100644 --- a/register_models.cmake +++ b/register_models.cmake @@ -19,15 +19,19 @@ #endfunction() # +macro(wipe_gcc_style_optimisation_flags VAR) + string(REGEX REPLACE "([\\/\\-]O.)" "" ${VAR} ${${VAR}}) +endmacro() + macro(register_link_library) list(APPEND LINK_LIBRARIES ${ARGN}) endmacro() macro(register_append_cxx_flags CONFIG) if ("${CONFIG}" STREQUAL "RELEASE" OR "${CONFIG}" STREQUAL "ANY") - list(APPEND RELEASE_FLAGS ${ARGN}) + list(APPEND DEFAULT_RELEASE_FLAGS ${ARGN}) elseif ("${CONFIG}" STREQUAL "DEBUG" OR "${CONFIG}" STREQUAL "ANY") - list(APPEND DEBUG_FLAGS ${ARGN}) + list(APPEND DEFAULT_DEBUG_FLAGS ${ARGN}) else () message(FATAL_ERROR "register_flags supports only RELEASE, DEBUG, or ANY for all configs, got `${CONFIG}`") endif () @@ -37,6 +41,19 @@ macro(register_append_link_flags) list(APPEND LINK_FLAGS ${ARGN}) endmacro() +macro(register_append_compiler_and_arch_specific_cxx_flags PREFIX CXX ARCH) + string(TOUPPER ${CXX} _CXX) + string(TOUPPER ${ARCH} _ARCH) + set(_CXX_ARCH_SPECIFIC_FLAGS "${${PREFIX}_${_CXX}_${_ARCH}}") + if (_CXX_ARCH_SPECIFIC_FLAGS) + register_append_cxx_flags(ANY ${_CXX_ARCH_SPECIFIC_FLAGS}) + endif () + set(_CXX_ARCH_SPECIFIC_FLAGS "${${PREFIX}_${_CXX}}") + if (_CXX_ARCH_SPECIFIC_FLAGS) + register_append_cxx_flags(ANY ${_CXX_ARCH_SPECIFIC_FLAGS}) + endif () +endmacro() + macro(register_definitions) list(APPEND IMPL_DEFINITIONS ${ARGN}) endmacro() @@ -100,25 +117,12 @@ function(registered_flags_action ACTION OUT) endfunction() - - macro(register_model NAME PREPROCESSOR_NAME) string(TOUPPER ${NAME} MODEL_UPPER) list(APPEND REGISTERED_MODELS "${NAME}") list(APPEND IMPL_${MODEL_UPPER}_SOURCES "${ARGN}") list(APPEND IMPL_${MODEL_UPPER}_DEFINITIONS "${PREPROCESSOR_NAME}") - -# if ("${MODEL_UPPER}" STREQUAL ${MODEL}) -# -# set(MODEL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${NAME}.cmake) -# -# if (NOT EXISTS ${MODEL_FILE}) -# message(FATAL_ERROR "${MODEL_FILE} not found, perhaps it needs to be implemented?") -# endif () -# include(${MODEL_FILE}) -# -# endif () endmacro() From fc6d032d7f9cc0df662d44c1b3b12741a424d8b6 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Tue, 23 Mar 2021 18:16:42 +0000 Subject: [PATCH 05/11] Use model name as exe prefix Drop C++17 requirement for OMP --- OMP.cmake | 2 -- ci-test-compile.sh | 9 ++++++--- register_models.cmake | 6 ++++++ 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/OMP.cmake b/OMP.cmake index 776c9b2..c8dde9f 100644 --- a/OMP.cmake +++ b/OMP.cmake @@ -99,8 +99,6 @@ register_flag_optional(OFFLOAD_APPEND_LINK_FLAG macro(setup) - set(CMAKE_CXX_STANDARD 17) - find_package(OpenMP REQUIRED) register_link_library(OpenMP::OpenMP_CXX) diff --git a/ci-test-compile.sh b/ci-test-compile.sh index 7a1b836..46046c4 100755 --- a/ci-test-compile.sh +++ b/ci-test-compile.sh @@ -50,13 +50,16 @@ run_build() { -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DMODEL="$model" $flags &>>"$log" + local model_lower=$(echo "$model" | awk '{print tolower($0)}') + local cmake_code=$? - "$CMAKE_BIN" --build "$build" --target babelstream -j "$(nproc)" &>>"$log" + "$CMAKE_BIN" --build "$build" -j "$(nproc)" &>>"$log" local cmake_code=$? set -e - local bin="./$build/babelstream" + local bin="./$build/$model_lower-stream" + echo "Checking for final executable: $bin" if [[ -f "$bin" ]]; then echo "$(tput setaf 2)[PASS!]($model->$build)$(tput sgr0): -DMODEL=$model $flags" # shellcheck disable=SC2002 @@ -119,7 +122,7 @@ build_gcc() { if [ "$MODEL" = "all" ] || [ "$MODEL" = "OMP" ]; then # sanity check that it at least runs echo "Sanity checking GCC OMP build..." - "./$BUILD_DIR/OMP_$name/babelstream" -s 1048576 -n 10 + "./$BUILD_DIR/OMP_$name/omp-stream" -s 1048576 -n 10 fi # some distributions like Ubuntu bionic implements std par with TBB, so conditionally link it here diff --git a/register_models.cmake b/register_models.cmake index 300315e..a4f7a94 100644 --- a/register_models.cmake +++ b/register_models.cmake @@ -136,5 +136,11 @@ macro(load_model MODEL) include(${MODEL_FILE}) list(APPEND IMPL_SOURCES ${IMPL_${MODEL_UPPER}_SOURCES}) list(APPEND IMPL_DEFINITIONS ${IMPL_${MODEL_UPPER}_DEFINITIONS}) + + string(TOLOWER ${MODEL} MODEL_LOWER) + set(EXE_NAME ${MODEL_LOWER}-stream) + + else () + message(FATAL_ERROR "Unsupported model: ${MODEL}") endif () endmacro() \ No newline at end of file From 23f343fa15899d4ba2749afa11be4a6bd77a1b33 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Wed, 24 Mar 2021 17:20:11 +0000 Subject: [PATCH 06/11] Default to C++11 --- CMakeLists.txt | 2 ++ KOKKOS.cmake | 1 + STD20.cmake | 1 + 3 files changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index ac45eda..d46007c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,6 +53,8 @@ set(EXE_NAME babelstream) # select default build type set(CMAKE_BUILD_TYPE "Release") +# for chrono and some basic CXX features, models can overwrite this if required +set(CMAKE_CXX_STANDARD 11) if (NOT CMAKE_BUILD_TYPE) message("No CMAKE_BUILD_TYPE specified, defaulting to 'Release'") diff --git a/KOKKOS.cmake b/KOKKOS.cmake index 9a9e54c..445991d 100644 --- a/KOKKOS.cmake +++ b/KOKKOS.cmake @@ -17,6 +17,7 @@ set(KOKKOS_FLAGS_CPU_INTEL -qopt-streaming-stores=always) macro(setup) + set(CMAKE_CXX_STANDARD 14) cmake_policy(SET CMP0074 NEW) #see https://github.com/kokkos/kokkos/blob/master/BUILD.md message(STATUS "Building using in-tree Kokkos source at `${KOKKOS_IN_TREE}`") diff --git a/STD20.cmake b/STD20.cmake index 46a0c61..fd07387 100644 --- a/STD20.cmake +++ b/STD20.cmake @@ -10,6 +10,7 @@ macro(setup) # C++ 2a is too new, disable CMake's std flags completely: set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_STANDARD_REQUIRED OFF) + unset(CMAKE_CXX_STANDARD) # drop any existing standard we have set by default # and append our own: register_append_cxx_flags(ANY -std=c++2a) endmacro() From 7ec28e55f1527d8684981adae946b65e5b2b4900 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Tue, 30 Mar 2021 13:02:48 +0100 Subject: [PATCH 07/11] CMake: Update CI rocm to 4.1.0 --- ci-prepare-bionic.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci-prepare-bionic.sh b/ci-prepare-bionic.sh index 904ad8e..d8ae312 100755 --- a/ci-prepare-bionic.sh +++ b/ci-prepare-bionic.sh @@ -241,7 +241,7 @@ setup_rocm() { sudo apt-get update -qq sudo apt-get install -y -qq rocm-dev # AMD needs this rocm_path thing exported... - export_var ROCM_PATH "/opt/rocm-4.0.0" + export_var ROCM_PATH "/opt/rocm-4.1.0" export_var HIP_CXX "$ROCM_PATH/bin/hipcc" verify_bin_exists "$HIP_CXX" "$HIP_CXX" --version From 4d83bf4b180823bc209c600cab549e5d72c6deb1 Mon Sep 17 00:00:00 2001 From: Andrei Poenaru Date: Tue, 30 Mar 2021 16:54:48 +0300 Subject: [PATCH 08/11] Fix some README typos --- README.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 9b3d775..8ca7398 100644 --- a/README.md +++ b/README.md @@ -69,8 +69,8 @@ As with any CMake project, first configure the project: > ./build/babelstream # executable available at ./build/ ``` -By default, we have defined a set of optimal flags for known HPC compilers and assigned those to `RELEASE_FLAG`. -You can override this if required. +By default, we have defined a set of optimal flags for known HPC compilers. +There are assigned those to `RELEASE_FLAGS`, and you can override them if required. To find out what flag each model supports or requires, simply configure while only specifying the model. For example: @@ -80,15 +80,16 @@ For example: ... - Common Release flags are `-O3`, set RELEASE_FLAGS to override -- CXX_EXTRA_FLAGS: - Appends to common compile flags, this will be appended at link phase at well. + Appends to common compile flags. These will be used at link phase at well. To use separate flags at link time, set `CXX_EXTRA_LINKER_FLAGS` -- CXX_EXTRA_LINK_FLAGS: - Appends to link flags (i.e appended only at link phase) which appears *before* the objects. - Do not use this for linking libraries as the link line is order dependent + Appends to link flags which appear *before* the objects. + Do not use this for linking libraries, as the link line is order-dependent -- CXX_EXTRA_LIBRARIES: - Append to link flags which appears *after* the objects, use this for linking extra libraries (e.g `-lmylib`, or simply `mylib`) + Append to link flags which appears *after* the objects. + Use this for linking extra libraries (e.g `-lmylib`, or simply `mylib`) -- CXX_EXTRA_LINKER_FLAGS: - Append to linker flags (i.e GCC's -Wl or equivalent) + Append to linker flags (i.e GCC's `-Wl` or equivalent) -- Available models: OMP;OCL;STD;STD20;HIP;CUDA;KOKKOS;SYCL;ACC;RAJA -- Selected model : OCL -- Supported flags: @@ -97,9 +98,9 @@ For example: OpenCL_LIBRARY (optional, default=): Path to OpenCL library, usually called libOpenCL.so ... ``` -Alternatively, refer to the [CI script](./ci-test-compile.sh) which test-compiles most of the models and see which flags they use. +Alternatively, refer to the [CI script](./ci-test-compile.sh), which test-compiles most of the models, and see which flags are used there. -*It is recommended that you delete the `build` directory when you any of the build flags.* +*It is recommended that you delete the `build` directory when you change any of the build flags.* ### GNU Make From f279f58a72d0eb28b86d47be04b81006290c1d7d Mon Sep 17 00:00:00 2001 From: Andrei Poenaru Date: Tue, 30 Mar 2021 17:08:03 +0300 Subject: [PATCH 09/11] Improve CMake messages --- CMakeLists.txt | 14 ++++++-------- register_models.cmake | 4 ++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d46007c..17669a3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,15 +81,16 @@ endmacro() # hint common extra flag options for all models if they are not set hint_flag(CXX_EXTRA_FLAGS " - Appends to common compile flags, this will be appended at link phase as well. + Appends to common compile flags. These will be appended at link phase as well. To use separate flags at link phase, set `CXX_EXTRA_LINK_FLAGS`") hint_flag(CXX_EXTRA_LINK_FLAGS " - Appends to link flags (i.e appended only at link phase) which appears *before* the objects. - Do not use this for linking libraries as the link line is order dependent") + Appends to link flags which appear *before* the objects. + Do not use this for linking libraries, as the link line is order-dependent") hint_flag(CXX_EXTRA_LIBRARIES " - Append to link flags which appears *after* the objects, use this for linking extra libraries (e.g `-lmylib`, or simply `mylib`)") + Append to link flags which appear *after* the objects. + Use this for linking extra libraries (e.g `-lmylib`, or simply `mylib`)") hint_flag(CXX_EXTRA_LINKER_FLAGS " - Append to linker flags (i.e GCC's -Wl or equivalent)") + Append to linker flags (i.e GCC's `-Wl` or equivalent)") # copy CXX_EXTRA_FLAGS <- CXX_EXTRA_LINK_FLAGS if ((DEFINED CXX_EXTRA_FLAGS) AND (NOT DEFINED CXX_EXTRA_LINK_FLAGS)) @@ -187,6 +188,3 @@ target_link_options(${EXE_NAME} PUBLIC ${LINK_FLAGS} ${CXX_EXTRA_LINK_FLAGS}) if (COMMAND setup_target) setup_target(${EXE_NAME}) endif () - - - diff --git a/register_models.cmake b/register_models.cmake index a4f7a94..82e7243 100644 --- a/register_models.cmake +++ b/register_models.cmake @@ -73,7 +73,7 @@ function(registered_flags_action ACTION OUT) if (${ACTION} STREQUAL "print") set(LINE "Supported flags:\n\n") elseif (${ACTION} STREQUAL "check") - set(LINE "Picked up model specific flags for this build:\n\n") + set(LINE "Model-specific flags for this build:\n\n") endif () @@ -143,4 +143,4 @@ macro(load_model MODEL) else () message(FATAL_ERROR "Unsupported model: ${MODEL}") endif () -endmacro() \ No newline at end of file +endmacro() From a94d678b34895065a40efa9809fdb3608b36fe7f Mon Sep 17 00:00:00 2001 From: Tom Deakin Date: Thu, 8 Apr 2021 17:06:43 +0100 Subject: [PATCH 10/11] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8256774..ed38908 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file. - SYCL build rules for ComputeCpp, DPCPP and HipSYCL. - Support for CUDA Managed Memory and Page Fault memory. - Added nstream kernel from PRK with associate command line option. +- CMake build system added for all models. ### Changed - Default branch renamed from `master` to `main`. From 6ecd10bf98268586b6df0cf1f6bfb9c0ab2a3a20 Mon Sep 17 00:00:00 2001 From: Tom Deakin Date: Tue, 20 Apr 2021 16:16:56 +0100 Subject: [PATCH 11/11] Deprecate HC --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed38908..19c2a6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ All notable changes to this project will be documented in this file. - Ensure all OpenCL kernels are present in destructor. - Unified run function in driver code to reduce code duplication, output should be uneffected. - Normalise sum result by expected value to help false negative errors. +- HC version deprecated and moved to a legacy directory. ### Removed - Pre-building of kernels in SYCL version to ensure compatibility with SYCL 1.2.1.