From 717cc40c2c05f38b50c541742593bd7b66f42215 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Sun, 23 May 2021 07:21:21 +0100 Subject: [PATCH 01/10] Move all C++ impl. to ./cpp --- .github/workflows/main.yaml | 54 +++++++++---------- .gitignore | 24 +-------- README.md | 4 +- cpp/.gitignore | 30 +++++++++++ ACC.cmake => cpp/ACC.cmake | 0 ACCStream.cpp => cpp/ACCStream.cpp | 0 ACCStream.h => cpp/ACCStream.h | 0 {CL => cpp/CL}/cl.h | 0 {CL => cpp/CL}/cl2.hpp | 0 {CL => cpp/CL}/cl_d3d10.h | 0 {CL => cpp/CL}/cl_d3d11.h | 0 {CL => cpp/CL}/cl_dx9_media_sharing.h | 0 {CL => cpp/CL}/cl_dx9_media_sharing_intel.h | 0 {CL => cpp/CL}/cl_egl.h | 0 {CL => cpp/CL}/cl_ext.h | 0 {CL => cpp/CL}/cl_ext_intel.h | 0 {CL => cpp/CL}/cl_gl.h | 0 {CL => cpp/CL}/cl_gl_ext.h | 0 {CL => cpp/CL}/cl_half.h | 0 {CL => cpp/CL}/cl_icd.h | 0 {CL => cpp/CL}/cl_platform.h | 0 .../CL}/cl_va_api_media_sharing_intel.h | 0 {CL => cpp/CL}/cl_version.h | 0 {CL => cpp/CL}/opencl.h | 0 CMakeLists.txt => cpp/CMakeLists.txt | 0 CUDA.cmake => cpp/CUDA.cmake | 0 CUDA.make => cpp/CUDA.make | 0 CUDAStream.cu => cpp/CUDAStream.cu | 0 CUDAStream.h => cpp/CUDAStream.h | 0 HIP.cmake => cpp/HIP.cmake | 0 HIP.make => cpp/HIP.make | 0 HIPStream.cpp => cpp/HIPStream.cpp | 0 HIPStream.h => cpp/HIPStream.h | 0 KOKKOS.cmake => cpp/KOKKOS.cmake | 0 Kokkos.make => cpp/Kokkos.make | 0 KokkosStream.cpp => cpp/KokkosStream.cpp | 0 KokkosStream.hpp => cpp/KokkosStream.hpp | 0 LICENSE => cpp/LICENSE | 0 OCL.cmake => cpp/OCL.cmake | 0 OCLStream.cpp => cpp/OCLStream.cpp | 0 OCLStream.h => cpp/OCLStream.h | 0 OMP.cmake => cpp/OMP.cmake | 0 OMPStream.cpp => cpp/OMPStream.cpp | 0 OMPStream.h => cpp/OMPStream.h | 0 OpenACC.make => cpp/OpenACC.make | 0 OpenCL.make => cpp/OpenCL.make | 0 OpenMP.make => cpp/OpenMP.make | 0 RAJA.cmake => cpp/RAJA.cmake | 0 RAJA.make => cpp/RAJA.make | 0 RAJAStream.cpp => cpp/RAJAStream.cpp | 0 RAJAStream.hpp => cpp/RAJAStream.hpp | 0 STD.cmake => cpp/STD.cmake | 0 STD.make => cpp/STD.make | 0 STD20.cmake => cpp/STD20.cmake | 0 STD20.make => cpp/STD20.make | 0 STD20Stream.cpp => cpp/STD20Stream.cpp | 0 STD20Stream.hpp => cpp/STD20Stream.hpp | 0 STDStream.cpp => cpp/STDStream.cpp | 0 STDStream.h => cpp/STDStream.h | 0 SYCL.cmake => cpp/SYCL.cmake | 0 SYCL.make => cpp/SYCL.make | 0 SYCLStream.cpp => cpp/SYCLStream.cpp | 0 SYCLStream.h => cpp/SYCLStream.h | 0 Stream.h => cpp/Stream.h | 0 .../ci-prepare-bionic.sh | 0 ci-test-compile.sh => cpp/ci-test-compile.sh | 0 .../Modules/ComputeCppCompilerChecks.cmake | 0 .../cmake}/Modules/ComputeCppIRMap.cmake | 0 .../cmake}/Modules/FindComputeCpp.cmake | 0 .../cmake}/toolchains/arm-gcc-poky.cmake | 0 .../cmake}/toolchains/gcc-generic.cmake | 0 {legacy => cpp/legacy}/HC.make | 0 {legacy => cpp/legacy}/HCStream.cpp | 0 {legacy => cpp/legacy}/HCStream.h | 0 main.cpp => cpp/main.cpp | 0 .../register_models.cmake | 0 76 files changed, 60 insertions(+), 52 deletions(-) create mode 100644 cpp/.gitignore rename ACC.cmake => cpp/ACC.cmake (100%) rename ACCStream.cpp => cpp/ACCStream.cpp (100%) rename ACCStream.h => cpp/ACCStream.h (100%) rename {CL => cpp/CL}/cl.h (100%) rename {CL => cpp/CL}/cl2.hpp (100%) rename {CL => cpp/CL}/cl_d3d10.h (100%) rename {CL => cpp/CL}/cl_d3d11.h (100%) rename {CL => cpp/CL}/cl_dx9_media_sharing.h (100%) rename {CL => cpp/CL}/cl_dx9_media_sharing_intel.h (100%) rename {CL => cpp/CL}/cl_egl.h (100%) rename {CL => cpp/CL}/cl_ext.h (100%) rename {CL => cpp/CL}/cl_ext_intel.h (100%) rename {CL => cpp/CL}/cl_gl.h (100%) rename {CL => cpp/CL}/cl_gl_ext.h (100%) rename {CL => cpp/CL}/cl_half.h (100%) rename {CL => cpp/CL}/cl_icd.h (100%) rename {CL => cpp/CL}/cl_platform.h (100%) rename {CL => cpp/CL}/cl_va_api_media_sharing_intel.h (100%) rename {CL => cpp/CL}/cl_version.h (100%) rename {CL => cpp/CL}/opencl.h (100%) rename CMakeLists.txt => cpp/CMakeLists.txt (100%) rename CUDA.cmake => cpp/CUDA.cmake (100%) rename CUDA.make => cpp/CUDA.make (100%) rename CUDAStream.cu => cpp/CUDAStream.cu (100%) rename CUDAStream.h => cpp/CUDAStream.h (100%) rename HIP.cmake => cpp/HIP.cmake (100%) rename HIP.make => cpp/HIP.make (100%) rename HIPStream.cpp => cpp/HIPStream.cpp (100%) rename HIPStream.h => cpp/HIPStream.h (100%) rename KOKKOS.cmake => cpp/KOKKOS.cmake (100%) rename Kokkos.make => cpp/Kokkos.make (100%) rename KokkosStream.cpp => cpp/KokkosStream.cpp (100%) rename KokkosStream.hpp => cpp/KokkosStream.hpp (100%) rename LICENSE => cpp/LICENSE (100%) rename OCL.cmake => cpp/OCL.cmake (100%) rename OCLStream.cpp => cpp/OCLStream.cpp (100%) rename OCLStream.h => cpp/OCLStream.h (100%) rename OMP.cmake => cpp/OMP.cmake (100%) rename OMPStream.cpp => cpp/OMPStream.cpp (100%) rename OMPStream.h => cpp/OMPStream.h (100%) rename OpenACC.make => cpp/OpenACC.make (100%) rename OpenCL.make => cpp/OpenCL.make (100%) rename OpenMP.make => cpp/OpenMP.make (100%) rename RAJA.cmake => cpp/RAJA.cmake (100%) rename RAJA.make => cpp/RAJA.make (100%) rename RAJAStream.cpp => cpp/RAJAStream.cpp (100%) rename RAJAStream.hpp => cpp/RAJAStream.hpp (100%) rename STD.cmake => cpp/STD.cmake (100%) rename STD.make => cpp/STD.make (100%) rename STD20.cmake => cpp/STD20.cmake (100%) rename STD20.make => cpp/STD20.make (100%) rename STD20Stream.cpp => cpp/STD20Stream.cpp (100%) rename STD20Stream.hpp => cpp/STD20Stream.hpp (100%) rename STDStream.cpp => cpp/STDStream.cpp (100%) rename STDStream.h => cpp/STDStream.h (100%) rename SYCL.cmake => cpp/SYCL.cmake (100%) rename SYCL.make => cpp/SYCL.make (100%) rename SYCLStream.cpp => cpp/SYCLStream.cpp (100%) rename SYCLStream.h => cpp/SYCLStream.h (100%) rename Stream.h => cpp/Stream.h (100%) rename ci-prepare-bionic.sh => cpp/ci-prepare-bionic.sh (100%) rename ci-test-compile.sh => cpp/ci-test-compile.sh (100%) rename {cmake => cpp/cmake}/Modules/ComputeCppCompilerChecks.cmake (100%) rename {cmake => cpp/cmake}/Modules/ComputeCppIRMap.cmake (100%) rename {cmake => cpp/cmake}/Modules/FindComputeCpp.cmake (100%) rename {cmake => cpp/cmake}/toolchains/arm-gcc-poky.cmake (100%) rename {cmake => cpp/cmake}/toolchains/gcc-generic.cmake (100%) rename {legacy => cpp/legacy}/HC.make (100%) rename {legacy => cpp/legacy}/HCStream.cpp (100%) rename {legacy => cpp/legacy}/HCStream.h (100%) rename main.cpp => cpp/main.cpp (100%) rename register_models.cmake => cpp/register_models.cmake (100%) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 20e1034..33f6573 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -13,86 +13,86 @@ jobs: uses: actions/cache@v2 with: path: compilers - key: ${{ runner.os }}-${{ hashFiles('ci-prepare-bionic.sh') }} + key: ${{ runner.os }}-${{ hashFiles('./cpp/ci-prepare-bionic.sh') }} - name: Prepare compilers if: steps.prepare-compilers.outputs.cache-hit != 'true' - run: source ./ci-prepare-bionic.sh ./compilers SETUP true || true + run: source ./cpp/ci-prepare-bionic.sh ./compilers SETUP true || true - name: Setup test environment - run: source ./ci-prepare-bionic.sh ./compilers VARS false || true + run: source ./cpp/ci-prepare-bionic.sh ./compilers VARS false || true - name: Test compile gcc @ CMake 3.13 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_13_BIN }} + run: ./cpp/ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_13_BIN }} - name: Test compile clang @ CMake 3.13 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_13_BIN }} + run: ./cpp/ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_13_BIN }} - name: Test compile nvhpc @ CMake 3.13 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_13_BIN }} + run: ./cpp/ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_13_BIN }} - name: Test compile aocc @ CMake 3.13 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_13_BIN }} + run: ./cpp/ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_13_BIN }} - name: Test compile aomp @ CMake 3.13 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_13_BIN }} + run: ./cpp/ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_13_BIN }} - name: Test compile hip @ CMake 3.13 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_13_BIN }} + run: ./cpp/ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_13_BIN }} - name: Test compile dpcpp @ CMake 3.13 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_13_BIN }} + run: ./cpp/ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_13_BIN }} - name: Test compile hipsycl @ CMake 3.13 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_13_BIN }} + run: ./cpp/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_13_BIN }} - name: Test compile gcc @ CMake 3.15 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_15_BIN }} + run: ./cpp/ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_15_BIN }} - name: Test compile clang @ CMake 3.15 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_15_BIN }} + run: ./cpp/ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_15_BIN }} - name: Test compile nvhpc @ CMake 3.15 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_15_BIN }} + run: ./cpp/ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_15_BIN }} - name: Test compile aocc @ CMake 3.15 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_15_BIN }} + run: ./cpp/ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_15_BIN }} - name: Test compile aomp @ CMake 3.15 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_15_BIN }} + run: ./cpp/ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_15_BIN }} - name: Test compile hip @ CMake 3.15 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_15_BIN }} + run: ./cpp/ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_15_BIN }} - name: Test compile dpcpp @ CMake 3.15 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_15_BIN }} + run: ./cpp/ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_15_BIN }} - name: Test compile hipsycl @ CMake 3.15 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_15_BIN }} + run: ./cpp/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_15_BIN }} - name: Test compile gcc @ CMake 3.18 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_18_BIN }} + run: ./cpp/ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_18_BIN }} - name: Test compile clang @ CMake 3.18 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_18_BIN }} + run: ./cpp/ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_18_BIN }} - name: Test compile nvhpc @ CMake 3.18 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_18_BIN }} + run: ./cpp/ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_18_BIN }} - name: Test compile aocc @ CMake 3.18 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_18_BIN }} + run: ./cpp/ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_18_BIN }} - name: Test compile aomp @ CMake 3.18 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_18_BIN }} + run: ./cpp/ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_18_BIN }} - name: Test compile hip @ CMake 3.18 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_18_BIN }} + run: ./cpp/ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_18_BIN }} - name: Test compile dpcpp @ CMake 3.18 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_18_BIN }} + run: ./cpp/ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_18_BIN }} - name: Test compile hipsycl @ CMake 3.18 if: ${{ ! cancelled() }} - run: ./ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_18_BIN }} \ No newline at end of file + run: ./cpp/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_18_BIN }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index c3ea1da..614eb0f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,30 +1,8 @@ -cuda-stream -ocl-stream -omp-stream -acc-stream -raja-stream -kokkos-stream -std-stream -sycl-stream -hip-stream - -*.o -*.bc -*.sycl -*.tar -*.gz -*.a - -KokkosCore_config.* - -.DS_Store - -Makefile build/ cmake-build-*/ CMakeFiles/ .idea/ .vscode/ -.directory \ No newline at end of file +.directory diff --git a/README.md b/README.md index 8ca7398..66ea661 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ The project supports building with CMake >= 3.13.0, it can be installed without As with any CMake project, first configure the project: ```shell -> cd babelstream +> cd babelstream/cpp > cmake -Bbuild -H. -DMODEL= # configure the build, build type defaults to Release > cmake --build build # compile it > ./build/babelstream # executable available at ./build/ @@ -75,7 +75,7 @@ There are assigned those to `RELEASE_FLAGS`, and you can override them if requir To find out what flag each model supports or requires, simply configure while only specifying the model. For example: ```shell -> cd babelstream +> cd babelstream/cpp > cmake -Bbuild -H. -DMODEL=OCL ... - Common Release flags are `-O3`, set RELEASE_FLAGS to override diff --git a/cpp/.gitignore b/cpp/.gitignore new file mode 100644 index 0000000..c3ea1da --- /dev/null +++ b/cpp/.gitignore @@ -0,0 +1,30 @@ + +cuda-stream +ocl-stream +omp-stream +acc-stream +raja-stream +kokkos-stream +std-stream +sycl-stream +hip-stream + +*.o +*.bc +*.sycl +*.tar +*.gz +*.a + +KokkosCore_config.* + +.DS_Store + +Makefile + +build/ +cmake-build-*/ +CMakeFiles/ +.idea/ +.vscode/ +.directory \ No newline at end of file diff --git a/ACC.cmake b/cpp/ACC.cmake similarity index 100% rename from ACC.cmake rename to cpp/ACC.cmake diff --git a/ACCStream.cpp b/cpp/ACCStream.cpp similarity index 100% rename from ACCStream.cpp rename to cpp/ACCStream.cpp diff --git a/ACCStream.h b/cpp/ACCStream.h similarity index 100% rename from ACCStream.h rename to cpp/ACCStream.h diff --git a/CL/cl.h b/cpp/CL/cl.h similarity index 100% rename from CL/cl.h rename to cpp/CL/cl.h diff --git a/CL/cl2.hpp b/cpp/CL/cl2.hpp similarity index 100% rename from CL/cl2.hpp rename to cpp/CL/cl2.hpp diff --git a/CL/cl_d3d10.h b/cpp/CL/cl_d3d10.h similarity index 100% rename from CL/cl_d3d10.h rename to cpp/CL/cl_d3d10.h diff --git a/CL/cl_d3d11.h b/cpp/CL/cl_d3d11.h similarity index 100% rename from CL/cl_d3d11.h rename to cpp/CL/cl_d3d11.h diff --git a/CL/cl_dx9_media_sharing.h b/cpp/CL/cl_dx9_media_sharing.h similarity index 100% rename from CL/cl_dx9_media_sharing.h rename to cpp/CL/cl_dx9_media_sharing.h diff --git a/CL/cl_dx9_media_sharing_intel.h b/cpp/CL/cl_dx9_media_sharing_intel.h similarity index 100% rename from CL/cl_dx9_media_sharing_intel.h rename to cpp/CL/cl_dx9_media_sharing_intel.h diff --git a/CL/cl_egl.h b/cpp/CL/cl_egl.h similarity index 100% rename from CL/cl_egl.h rename to cpp/CL/cl_egl.h diff --git a/CL/cl_ext.h b/cpp/CL/cl_ext.h similarity index 100% rename from CL/cl_ext.h rename to cpp/CL/cl_ext.h diff --git a/CL/cl_ext_intel.h b/cpp/CL/cl_ext_intel.h similarity index 100% rename from CL/cl_ext_intel.h rename to cpp/CL/cl_ext_intel.h diff --git a/CL/cl_gl.h b/cpp/CL/cl_gl.h similarity index 100% rename from CL/cl_gl.h rename to cpp/CL/cl_gl.h diff --git a/CL/cl_gl_ext.h b/cpp/CL/cl_gl_ext.h similarity index 100% rename from CL/cl_gl_ext.h rename to cpp/CL/cl_gl_ext.h diff --git a/CL/cl_half.h b/cpp/CL/cl_half.h similarity index 100% rename from CL/cl_half.h rename to cpp/CL/cl_half.h diff --git a/CL/cl_icd.h b/cpp/CL/cl_icd.h similarity index 100% rename from CL/cl_icd.h rename to cpp/CL/cl_icd.h diff --git a/CL/cl_platform.h b/cpp/CL/cl_platform.h similarity index 100% rename from CL/cl_platform.h rename to cpp/CL/cl_platform.h diff --git a/CL/cl_va_api_media_sharing_intel.h b/cpp/CL/cl_va_api_media_sharing_intel.h similarity index 100% rename from CL/cl_va_api_media_sharing_intel.h rename to cpp/CL/cl_va_api_media_sharing_intel.h diff --git a/CL/cl_version.h b/cpp/CL/cl_version.h similarity index 100% rename from CL/cl_version.h rename to cpp/CL/cl_version.h diff --git a/CL/opencl.h b/cpp/CL/opencl.h similarity index 100% rename from CL/opencl.h rename to cpp/CL/opencl.h diff --git a/CMakeLists.txt b/cpp/CMakeLists.txt similarity index 100% rename from CMakeLists.txt rename to cpp/CMakeLists.txt diff --git a/CUDA.cmake b/cpp/CUDA.cmake similarity index 100% rename from CUDA.cmake rename to cpp/CUDA.cmake diff --git a/CUDA.make b/cpp/CUDA.make similarity index 100% rename from CUDA.make rename to cpp/CUDA.make diff --git a/CUDAStream.cu b/cpp/CUDAStream.cu similarity index 100% rename from CUDAStream.cu rename to cpp/CUDAStream.cu diff --git a/CUDAStream.h b/cpp/CUDAStream.h similarity index 100% rename from CUDAStream.h rename to cpp/CUDAStream.h diff --git a/HIP.cmake b/cpp/HIP.cmake similarity index 100% rename from HIP.cmake rename to cpp/HIP.cmake diff --git a/HIP.make b/cpp/HIP.make similarity index 100% rename from HIP.make rename to cpp/HIP.make diff --git a/HIPStream.cpp b/cpp/HIPStream.cpp similarity index 100% rename from HIPStream.cpp rename to cpp/HIPStream.cpp diff --git a/HIPStream.h b/cpp/HIPStream.h similarity index 100% rename from HIPStream.h rename to cpp/HIPStream.h diff --git a/KOKKOS.cmake b/cpp/KOKKOS.cmake similarity index 100% rename from KOKKOS.cmake rename to cpp/KOKKOS.cmake diff --git a/Kokkos.make b/cpp/Kokkos.make similarity index 100% rename from Kokkos.make rename to cpp/Kokkos.make diff --git a/KokkosStream.cpp b/cpp/KokkosStream.cpp similarity index 100% rename from KokkosStream.cpp rename to cpp/KokkosStream.cpp diff --git a/KokkosStream.hpp b/cpp/KokkosStream.hpp similarity index 100% rename from KokkosStream.hpp rename to cpp/KokkosStream.hpp diff --git a/LICENSE b/cpp/LICENSE similarity index 100% rename from LICENSE rename to cpp/LICENSE diff --git a/OCL.cmake b/cpp/OCL.cmake similarity index 100% rename from OCL.cmake rename to cpp/OCL.cmake diff --git a/OCLStream.cpp b/cpp/OCLStream.cpp similarity index 100% rename from OCLStream.cpp rename to cpp/OCLStream.cpp diff --git a/OCLStream.h b/cpp/OCLStream.h similarity index 100% rename from OCLStream.h rename to cpp/OCLStream.h diff --git a/OMP.cmake b/cpp/OMP.cmake similarity index 100% rename from OMP.cmake rename to cpp/OMP.cmake diff --git a/OMPStream.cpp b/cpp/OMPStream.cpp similarity index 100% rename from OMPStream.cpp rename to cpp/OMPStream.cpp diff --git a/OMPStream.h b/cpp/OMPStream.h similarity index 100% rename from OMPStream.h rename to cpp/OMPStream.h diff --git a/OpenACC.make b/cpp/OpenACC.make similarity index 100% rename from OpenACC.make rename to cpp/OpenACC.make diff --git a/OpenCL.make b/cpp/OpenCL.make similarity index 100% rename from OpenCL.make rename to cpp/OpenCL.make diff --git a/OpenMP.make b/cpp/OpenMP.make similarity index 100% rename from OpenMP.make rename to cpp/OpenMP.make diff --git a/RAJA.cmake b/cpp/RAJA.cmake similarity index 100% rename from RAJA.cmake rename to cpp/RAJA.cmake diff --git a/RAJA.make b/cpp/RAJA.make similarity index 100% rename from RAJA.make rename to cpp/RAJA.make diff --git a/RAJAStream.cpp b/cpp/RAJAStream.cpp similarity index 100% rename from RAJAStream.cpp rename to cpp/RAJAStream.cpp diff --git a/RAJAStream.hpp b/cpp/RAJAStream.hpp similarity index 100% rename from RAJAStream.hpp rename to cpp/RAJAStream.hpp diff --git a/STD.cmake b/cpp/STD.cmake similarity index 100% rename from STD.cmake rename to cpp/STD.cmake diff --git a/STD.make b/cpp/STD.make similarity index 100% rename from STD.make rename to cpp/STD.make diff --git a/STD20.cmake b/cpp/STD20.cmake similarity index 100% rename from STD20.cmake rename to cpp/STD20.cmake diff --git a/STD20.make b/cpp/STD20.make similarity index 100% rename from STD20.make rename to cpp/STD20.make diff --git a/STD20Stream.cpp b/cpp/STD20Stream.cpp similarity index 100% rename from STD20Stream.cpp rename to cpp/STD20Stream.cpp diff --git a/STD20Stream.hpp b/cpp/STD20Stream.hpp similarity index 100% rename from STD20Stream.hpp rename to cpp/STD20Stream.hpp diff --git a/STDStream.cpp b/cpp/STDStream.cpp similarity index 100% rename from STDStream.cpp rename to cpp/STDStream.cpp diff --git a/STDStream.h b/cpp/STDStream.h similarity index 100% rename from STDStream.h rename to cpp/STDStream.h diff --git a/SYCL.cmake b/cpp/SYCL.cmake similarity index 100% rename from SYCL.cmake rename to cpp/SYCL.cmake diff --git a/SYCL.make b/cpp/SYCL.make similarity index 100% rename from SYCL.make rename to cpp/SYCL.make diff --git a/SYCLStream.cpp b/cpp/SYCLStream.cpp similarity index 100% rename from SYCLStream.cpp rename to cpp/SYCLStream.cpp diff --git a/SYCLStream.h b/cpp/SYCLStream.h similarity index 100% rename from SYCLStream.h rename to cpp/SYCLStream.h diff --git a/Stream.h b/cpp/Stream.h similarity index 100% rename from Stream.h rename to cpp/Stream.h diff --git a/ci-prepare-bionic.sh b/cpp/ci-prepare-bionic.sh similarity index 100% rename from ci-prepare-bionic.sh rename to cpp/ci-prepare-bionic.sh diff --git a/ci-test-compile.sh b/cpp/ci-test-compile.sh similarity index 100% rename from ci-test-compile.sh rename to cpp/ci-test-compile.sh diff --git a/cmake/Modules/ComputeCppCompilerChecks.cmake b/cpp/cmake/Modules/ComputeCppCompilerChecks.cmake similarity index 100% rename from cmake/Modules/ComputeCppCompilerChecks.cmake rename to cpp/cmake/Modules/ComputeCppCompilerChecks.cmake diff --git a/cmake/Modules/ComputeCppIRMap.cmake b/cpp/cmake/Modules/ComputeCppIRMap.cmake similarity index 100% rename from cmake/Modules/ComputeCppIRMap.cmake rename to cpp/cmake/Modules/ComputeCppIRMap.cmake diff --git a/cmake/Modules/FindComputeCpp.cmake b/cpp/cmake/Modules/FindComputeCpp.cmake similarity index 100% rename from cmake/Modules/FindComputeCpp.cmake rename to cpp/cmake/Modules/FindComputeCpp.cmake diff --git a/cmake/toolchains/arm-gcc-poky.cmake b/cpp/cmake/toolchains/arm-gcc-poky.cmake similarity index 100% rename from cmake/toolchains/arm-gcc-poky.cmake rename to cpp/cmake/toolchains/arm-gcc-poky.cmake diff --git a/cmake/toolchains/gcc-generic.cmake b/cpp/cmake/toolchains/gcc-generic.cmake similarity index 100% rename from cmake/toolchains/gcc-generic.cmake rename to cpp/cmake/toolchains/gcc-generic.cmake diff --git a/legacy/HC.make b/cpp/legacy/HC.make similarity index 100% rename from legacy/HC.make rename to cpp/legacy/HC.make diff --git a/legacy/HCStream.cpp b/cpp/legacy/HCStream.cpp similarity index 100% rename from legacy/HCStream.cpp rename to cpp/legacy/HCStream.cpp diff --git a/legacy/HCStream.h b/cpp/legacy/HCStream.h similarity index 100% rename from legacy/HCStream.h rename to cpp/legacy/HCStream.h diff --git a/main.cpp b/cpp/main.cpp similarity index 100% rename from main.cpp rename to cpp/main.cpp diff --git a/register_models.cmake b/cpp/register_models.cmake similarity index 100% rename from register_models.cmake rename to cpp/register_models.cmake From 67a7447924b9fec5b1f34b1648dae6ade25ec13a Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Sun, 23 May 2021 07:36:08 +0100 Subject: [PATCH 02/10] Enter ./cpp first before CI tasks --- .github/workflows/main.yaml | 57 +++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 33f6573..173afed 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -3,7 +3,7 @@ on: [push, pull_request] jobs: - test: + "test_compile_cpp": runs-on: ubuntu-18.04 steps: - uses: actions/checkout@v2 @@ -15,84 +15,87 @@ jobs: path: compilers key: ${{ runner.os }}-${{ hashFiles('./cpp/ci-prepare-bionic.sh') }} + - name: Prepare compilers + run: cd ./cpp + - name: Prepare compilers if: steps.prepare-compilers.outputs.cache-hit != 'true' - run: source ./cpp/ci-prepare-bionic.sh ./compilers SETUP true || true + run: source ./ci-prepare-bionic.sh ./compilers SETUP true || true - name: Setup test environment - run: source ./cpp/ci-prepare-bionic.sh ./compilers VARS false || true + run: source ./ci-prepare-bionic.sh ./compilers VARS false || true - name: Test compile gcc @ CMake 3.13 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_13_BIN }} + run: ./ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_13_BIN }} - name: Test compile clang @ CMake 3.13 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_13_BIN }} + run: ./ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_13_BIN }} - name: Test compile nvhpc @ CMake 3.13 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_13_BIN }} + run: ./ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_13_BIN }} - name: Test compile aocc @ CMake 3.13 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_13_BIN }} + run: ./ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_13_BIN }} - name: Test compile aomp @ CMake 3.13 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_13_BIN }} + run: ./ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_13_BIN }} - name: Test compile hip @ CMake 3.13 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_13_BIN }} + run: ./ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_13_BIN }} - name: Test compile dpcpp @ CMake 3.13 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_13_BIN }} + run: ./ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_13_BIN }} - name: Test compile hipsycl @ CMake 3.13 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_13_BIN }} + run: ./ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_13_BIN }} - name: Test compile gcc @ CMake 3.15 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_15_BIN }} + run: ./ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_15_BIN }} - name: Test compile clang @ CMake 3.15 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_15_BIN }} + run: ./ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_15_BIN }} - name: Test compile nvhpc @ CMake 3.15 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_15_BIN }} + run: ./ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_15_BIN }} - name: Test compile aocc @ CMake 3.15 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_15_BIN }} + run: ./ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_15_BIN }} - name: Test compile aomp @ CMake 3.15 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_15_BIN }} + run: ./ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_15_BIN }} - name: Test compile hip @ CMake 3.15 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_15_BIN }} + run: ./ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_15_BIN }} - name: Test compile dpcpp @ CMake 3.15 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_15_BIN }} + run: ./ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_15_BIN }} - name: Test compile hipsycl @ CMake 3.15 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_15_BIN }} + run: ./ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_15_BIN }} - name: Test compile gcc @ CMake 3.18 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_18_BIN }} + run: ./ci-test-compile.sh ./build gcc all ${{ env.CMAKE_3_18_BIN }} - name: Test compile clang @ CMake 3.18 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_18_BIN }} + run: ./ci-test-compile.sh ./build clang all ${{ env.CMAKE_3_18_BIN }} - name: Test compile nvhpc @ CMake 3.18 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_18_BIN }} + run: ./ci-test-compile.sh ./build nvhpc all ${{ env.CMAKE_3_18_BIN }} - name: Test compile aocc @ CMake 3.18 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_18_BIN }} + run: ./ci-test-compile.sh ./build aocc all ${{ env.CMAKE_3_18_BIN }} - name: Test compile aomp @ CMake 3.18 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_18_BIN }} + run: ./ci-test-compile.sh ./build aomp all ${{ env.CMAKE_3_18_BIN }} - name: Test compile hip @ CMake 3.18 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_18_BIN }} + run: ./ci-test-compile.sh ./build hip all ${{ env.CMAKE_3_18_BIN }} - name: Test compile dpcpp @ CMake 3.18 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_18_BIN }} + run: ./ci-test-compile.sh ./build dpcpp all ${{ env.CMAKE_3_18_BIN }} - name: Test compile hipsycl @ CMake 3.18 if: ${{ ! cancelled() }} - run: ./cpp/ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_18_BIN }} \ No newline at end of file + run: ./ci-test-compile.sh ./build hipsycl all ${{ env.CMAKE_3_18_BIN }} \ No newline at end of file From 60d4cb8c8763e772087309c19bf439f4f42fcdd1 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Sun, 23 May 2021 07:39:04 +0100 Subject: [PATCH 03/10] Enter ./cpp first before CI tasks, take 2 --- .github/workflows/main.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 173afed..43a036f 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -5,6 +5,9 @@ on: [push, pull_request] jobs: "test_compile_cpp": runs-on: ubuntu-18.04 + defaults: + run: + working-directory: ./cpp steps: - uses: actions/checkout@v2 @@ -15,9 +18,6 @@ jobs: path: compilers key: ${{ runner.os }}-${{ hashFiles('./cpp/ci-prepare-bionic.sh') }} - - name: Prepare compilers - run: cd ./cpp - - name: Prepare compilers if: steps.prepare-compilers.outputs.cache-hit != 'true' run: source ./ci-prepare-bionic.sh ./compilers SETUP true || true From 5318404249a4b1ac00059f2c08c784bfb28e2e02 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Wed, 26 May 2021 17:46:07 +0100 Subject: [PATCH 04/10] Use ./src instead of ./cpp Create subdir for each cpp-based implementation --- .github/workflows/main.yaml | 4 +- cpp/LICENSE => LICENSE | 0 README.md | 18 ++- cpp/.gitignore | 30 ---- {cpp/legacy => legacy}/HCStream.cpp | 0 {cpp/legacy => legacy}/HCStream.h | 0 cpp/legacy/HC.make => legacy/Makefile | 3 +- src/.gitignore | 29 ++++ {cpp => src}/CL/cl.h | 0 {cpp => src}/CL/cl2.hpp | 0 {cpp => src}/CL/cl_d3d10.h | 0 {cpp => src}/CL/cl_d3d11.h | 0 {cpp => src}/CL/cl_dx9_media_sharing.h | 0 {cpp => src}/CL/cl_dx9_media_sharing_intel.h | 0 {cpp => src}/CL/cl_egl.h | 0 {cpp => src}/CL/cl_ext.h | 0 {cpp => src}/CL/cl_ext_intel.h | 0 {cpp => src}/CL/cl_gl.h | 0 {cpp => src}/CL/cl_gl_ext.h | 0 {cpp => src}/CL/cl_half.h | 0 {cpp => src}/CL/cl_icd.h | 0 {cpp => src}/CL/cl_platform.h | 0 .../CL/cl_va_api_media_sharing_intel.h | 0 {cpp => src}/CL/cl_version.h | 0 {cpp => src}/CL/opencl.h | 0 {cpp => src}/CMakeLists.txt | 64 ++------ {cpp => src}/Stream.h | 0 {cpp => src/acc}/ACCStream.cpp | 0 {cpp => src/acc}/ACCStream.h | 0 cpp/OpenACC.make => src/acc/Makefile | 4 +- cpp/ACC.cmake => src/acc/model.cmake | 0 {cpp => src}/ci-prepare-bionic.sh | 0 {cpp => src}/ci-test-compile.sh | 150 +++++++++--------- .../Modules/ComputeCppCompilerChecks.cmake | 0 .../cmake/Modules/ComputeCppIRMap.cmake | 0 .../cmake/Modules/FindComputeCpp.cmake | 0 .../cmake/toolchains/arm-gcc-poky.cmake | 0 .../cmake/toolchains/gcc-generic.cmake | 0 {cpp => src/cuda}/CUDAStream.cu | 0 {cpp => src/cuda}/CUDAStream.h | 0 cpp/CUDA.make => src/cuda/Makefile | 4 +- cpp/CUDA.cmake => src/cuda/model.cmake | 0 {cpp => src/hip}/HIPStream.cpp | 0 {cpp => src/hip}/HIPStream.h | 0 cpp/HIP.make => src/hip/Makefile | 4 +- cpp/HIP.cmake => src/hip/model.cmake | 0 {cpp => src/kokkos}/KokkosStream.cpp | 0 {cpp => src/kokkos}/KokkosStream.hpp | 0 cpp/Kokkos.make => src/kokkos/Makefile | 13 +- cpp/KOKKOS.cmake => src/kokkos/model.cmake | 0 {cpp => src}/main.cpp | 0 cpp/OpenCL.make => src/ocl/Makefile | 4 +- {cpp => src/ocl}/OCLStream.cpp | 0 {cpp => src/ocl}/OCLStream.h | 0 cpp/OCL.cmake => src/ocl/model.cmake | 0 cpp/OpenMP.make => src/omp/Makefile | 4 +- {cpp => src/omp}/OMPStream.cpp | 0 {cpp => src/omp}/OMPStream.h | 0 cpp/OMP.cmake => src/omp/model.cmake | 0 cpp/RAJA.make => src/raja/Makefile | 4 +- {cpp => src/raja}/RAJAStream.cpp | 0 {cpp => src/raja}/RAJAStream.hpp | 0 cpp/RAJA.cmake => src/raja/model.cmake | 0 {cpp => src}/register_models.cmake | 12 +- cpp/STD.make => src/std/Makefile | 4 +- {cpp => src/std}/STDStream.cpp | 0 {cpp => src/std}/STDStream.h | 0 cpp/STD.cmake => src/std/model.cmake | 0 cpp/STD20.make => src/std20/Makefile | 4 +- {cpp => src/std20}/STD20Stream.cpp | 0 {cpp => src/std20}/STD20Stream.hpp | 0 cpp/STD20.cmake => src/std20/model.cmake | 0 cpp/SYCL.make => src/sycl/Makefile | 4 +- {cpp => src/sycl}/SYCLStream.cpp | 0 {cpp => src/sycl}/SYCLStream.h | 0 cpp/SYCL.cmake => src/sycl/model.cmake | 0 76 files changed, 163 insertions(+), 196 deletions(-) rename cpp/LICENSE => LICENSE (100%) delete mode 100644 cpp/.gitignore rename {cpp/legacy => legacy}/HCStream.cpp (100%) rename {cpp/legacy => legacy}/HCStream.h (100%) rename cpp/legacy/HC.make => legacy/Makefile (79%) create mode 100644 src/.gitignore rename {cpp => src}/CL/cl.h (100%) rename {cpp => src}/CL/cl2.hpp (100%) rename {cpp => src}/CL/cl_d3d10.h (100%) rename {cpp => src}/CL/cl_d3d11.h (100%) rename {cpp => src}/CL/cl_dx9_media_sharing.h (100%) rename {cpp => src}/CL/cl_dx9_media_sharing_intel.h (100%) rename {cpp => src}/CL/cl_egl.h (100%) rename {cpp => src}/CL/cl_ext.h (100%) rename {cpp => src}/CL/cl_ext_intel.h (100%) rename {cpp => src}/CL/cl_gl.h (100%) rename {cpp => src}/CL/cl_gl_ext.h (100%) rename {cpp => src}/CL/cl_half.h (100%) rename {cpp => src}/CL/cl_icd.h (100%) rename {cpp => src}/CL/cl_platform.h (100%) rename {cpp => src}/CL/cl_va_api_media_sharing_intel.h (100%) rename {cpp => src}/CL/cl_version.h (100%) rename {cpp => src}/CL/opencl.h (100%) rename {cpp => src}/CMakeLists.txt (76%) rename {cpp => src}/Stream.h (100%) rename {cpp => src/acc}/ACCStream.cpp (100%) rename {cpp => src/acc}/ACCStream.h (100%) rename cpp/OpenACC.make => src/acc/Makefile (96%) rename cpp/ACC.cmake => src/acc/model.cmake (100%) rename {cpp => src}/ci-prepare-bionic.sh (100%) rename {cpp => src}/ci-test-compile.sh (61%) rename {cpp => src}/cmake/Modules/ComputeCppCompilerChecks.cmake (100%) rename {cpp => src}/cmake/Modules/ComputeCppIRMap.cmake (100%) rename {cpp => src}/cmake/Modules/FindComputeCpp.cmake (100%) rename {cpp => src}/cmake/toolchains/arm-gcc-poky.cmake (100%) rename {cpp => src}/cmake/toolchains/gcc-generic.cmake (100%) rename {cpp => src/cuda}/CUDAStream.cu (100%) rename {cpp => src/cuda}/CUDAStream.h (100%) rename cpp/CUDA.make => src/cuda/Makefile (88%) rename cpp/CUDA.cmake => src/cuda/model.cmake (100%) rename {cpp => src/hip}/HIPStream.cpp (100%) rename {cpp => src/hip}/HIPStream.h (100%) rename cpp/HIP.make => src/hip/Makefile (76%) rename cpp/HIP.cmake => src/hip/model.cmake (100%) rename {cpp => src/kokkos}/KokkosStream.cpp (100%) rename {cpp => src/kokkos}/KokkosStream.hpp (100%) rename cpp/Kokkos.make => src/kokkos/Makefile (85%) rename cpp/KOKKOS.cmake => src/kokkos/model.cmake (100%) rename {cpp => src}/main.cpp (100%) rename cpp/OpenCL.make => src/ocl/Makefile (85%) rename {cpp => src/ocl}/OCLStream.cpp (100%) rename {cpp => src/ocl}/OCLStream.h (100%) rename cpp/OCL.cmake => src/ocl/model.cmake (100%) rename cpp/OpenMP.make => src/omp/Makefile (95%) rename {cpp => src/omp}/OMPStream.cpp (100%) rename {cpp => src/omp}/OMPStream.h (100%) rename cpp/OMP.cmake => src/omp/model.cmake (100%) rename cpp/RAJA.make => src/raja/Makefile (92%) rename {cpp => src/raja}/RAJAStream.cpp (100%) rename {cpp => src/raja}/RAJAStream.hpp (100%) rename cpp/RAJA.cmake => src/raja/model.cmake (100%) rename {cpp => src}/register_models.cmake (93%) rename cpp/STD.make => src/std/Makefile (72%) rename {cpp => src/std}/STDStream.cpp (100%) rename {cpp => src/std}/STDStream.h (100%) rename cpp/STD.cmake => src/std/model.cmake (100%) rename cpp/STD20.make => src/std20/Makefile (76%) rename {cpp => src/std20}/STD20Stream.cpp (100%) rename {cpp => src/std20}/STD20Stream.hpp (100%) rename cpp/STD20.cmake => src/std20/model.cmake (100%) rename cpp/SYCL.make => src/sycl/Makefile (96%) rename {cpp => src/sycl}/SYCLStream.cpp (100%) rename {cpp => src/sycl}/SYCLStream.h (100%) rename cpp/SYCL.cmake => src/sycl/model.cmake (100%) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 43a036f..5bed7b8 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-18.04 defaults: run: - working-directory: ./cpp + working-directory: ./src steps: - uses: actions/checkout@v2 @@ -16,7 +16,7 @@ jobs: uses: actions/cache@v2 with: path: compilers - key: ${{ runner.os }}-${{ hashFiles('./cpp/ci-prepare-bionic.sh') }} + key: ${{ runner.os }}-${{ hashFiles('./src/ci-prepare-bionic.sh') }} - name: Prepare compilers if: steps.prepare-compilers.outputs.cache-hit != 'true' diff --git a/cpp/LICENSE b/LICENSE similarity index 100% rename from cpp/LICENSE rename to LICENSE diff --git a/README.md b/README.md index 66ea661..3eaf9a5 100644 --- a/README.md +++ b/README.md @@ -63,20 +63,22 @@ The project supports building with CMake >= 3.13.0, it can be installed without As with any CMake project, first configure the project: ```shell -> cd babelstream/cpp +> cd babelstream/src > cmake -Bbuild -H. -DMODEL= # configure the build, build type defaults to Release > cmake --build build # compile it -> ./build/babelstream # executable available at ./build/ +> ./build/-stream # executable available at ./build/ ``` +Source for each model's implementations are located in `./src/`. + By default, we have defined a set of optimal flags for known HPC compilers. There are assigned those to `RELEASE_FLAGS`, and you can override them if required. To find out what flag each model supports or requires, simply configure while only specifying the model. For example: ```shell -> cd babelstream/cpp -> cmake -Bbuild -H. -DMODEL=OCL +> cd babelstream/src +> cmake -Bbuild -H. -DMODEL=ocl ... - Common Release flags are `-O3`, set RELEASE_FLAGS to override -- CXX_EXTRA_FLAGS: @@ -90,8 +92,8 @@ For example: Use this for linking extra libraries (e.g `-lmylib`, or simply `mylib`) -- CXX_EXTRA_LINKER_FLAGS: Append to linker flags (i.e GCC's `-Wl` or equivalent) --- Available models: OMP;OCL;STD;STD20;HIP;CUDA;KOKKOS;SYCL;ACC;RAJA --- Selected model : OCL +-- Available models: omp;ocl;std;std20;hip;cuda;kokkos;sycl;acc;raja +-- Selected model : ocl -- Supported flags: CMAKE_CXX_COMPILER (optional, default=c++): Any CXX compiler that is supported by CMake detection @@ -107,7 +109,7 @@ Alternatively, refer to the [CI script](./ci-test-compile.sh), which test-compil We have supplied a series of Makefiles, one for each programming model, to assist with building. The Makefiles contain common build options, and should be simple to customise for your needs too. -General usage is `make -f .make` +General usage is `make -C src/` Common compiler flags and names can be set by passing a `COMPILER` option to Make, e.g. `make COMPILER=GNU`. Some models allow specifying a CPU or GPU style target, and this can be set by passing a `TARGET` option to Make, e.g. `make TARGET=GPU`. @@ -125,7 +127,7 @@ cd wget https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz tar -xvf 3.1.01.tar.gz # should end up with ~/kokkos-3.1.01 cd BabelStream -make -f Kokkos.make KOKKOS_PATH=~/kokkos-3.1.01 +make -C src/kokkos KOKKOS_PATH=~/kokkos-3.1.01 ``` See make output for more information on supported flags. diff --git a/cpp/.gitignore b/cpp/.gitignore deleted file mode 100644 index c3ea1da..0000000 --- a/cpp/.gitignore +++ /dev/null @@ -1,30 +0,0 @@ - -cuda-stream -ocl-stream -omp-stream -acc-stream -raja-stream -kokkos-stream -std-stream -sycl-stream -hip-stream - -*.o -*.bc -*.sycl -*.tar -*.gz -*.a - -KokkosCore_config.* - -.DS_Store - -Makefile - -build/ -cmake-build-*/ -CMakeFiles/ -.idea/ -.vscode/ -.directory \ No newline at end of file diff --git a/cpp/legacy/HCStream.cpp b/legacy/HCStream.cpp similarity index 100% rename from cpp/legacy/HCStream.cpp rename to legacy/HCStream.cpp diff --git a/cpp/legacy/HCStream.h b/legacy/HCStream.h similarity index 100% rename from cpp/legacy/HCStream.h rename to legacy/HCStream.h diff --git a/cpp/legacy/HC.make b/legacy/Makefile similarity index 79% rename from cpp/legacy/HC.make rename to legacy/Makefile index b902ada..8047ae1 100644 --- a/cpp/legacy/HC.make +++ b/legacy/Makefile @@ -14,7 +14,8 @@ endif hc-stream: ../main.cpp HCStream.cpp - $(HCC) $(CXXFLAGS) -DHC $^ $(LDFLAGS) $(EXTRA_FLAGS) -o $@ + $(HCC) $(CXXFLAGS) -DHC $^ $(LDFLAGS) $(EXTRA_FLAGS) -o $@ -I. -I.. + .PHONY: clean clean: diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..568a953 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,29 @@ + +**/cuda-stream +**/ocl-stream +**/omp-stream +**/acc-stream +**/raja-stream +**/kokkos-stream +**/std-stream +**/sycl-stream +**/hip-stream + +**/*.o +**/*.bc +**/*.sycl +**/*.tar +**/*.gz +**/*.a + +**/KokkosCore_Config_* + +**/.DS_Store + + +build/ +cmake-build-*/ +CMakeFiles/ +.idea/ +.vscode/ +.directory \ No newline at end of file diff --git a/cpp/CL/cl.h b/src/CL/cl.h similarity index 100% rename from cpp/CL/cl.h rename to src/CL/cl.h diff --git a/cpp/CL/cl2.hpp b/src/CL/cl2.hpp similarity index 100% rename from cpp/CL/cl2.hpp rename to src/CL/cl2.hpp diff --git a/cpp/CL/cl_d3d10.h b/src/CL/cl_d3d10.h similarity index 100% rename from cpp/CL/cl_d3d10.h rename to src/CL/cl_d3d10.h diff --git a/cpp/CL/cl_d3d11.h b/src/CL/cl_d3d11.h similarity index 100% rename from cpp/CL/cl_d3d11.h rename to src/CL/cl_d3d11.h diff --git a/cpp/CL/cl_dx9_media_sharing.h b/src/CL/cl_dx9_media_sharing.h similarity index 100% rename from cpp/CL/cl_dx9_media_sharing.h rename to src/CL/cl_dx9_media_sharing.h diff --git a/cpp/CL/cl_dx9_media_sharing_intel.h b/src/CL/cl_dx9_media_sharing_intel.h similarity index 100% rename from cpp/CL/cl_dx9_media_sharing_intel.h rename to src/CL/cl_dx9_media_sharing_intel.h diff --git a/cpp/CL/cl_egl.h b/src/CL/cl_egl.h similarity index 100% rename from cpp/CL/cl_egl.h rename to src/CL/cl_egl.h diff --git a/cpp/CL/cl_ext.h b/src/CL/cl_ext.h similarity index 100% rename from cpp/CL/cl_ext.h rename to src/CL/cl_ext.h diff --git a/cpp/CL/cl_ext_intel.h b/src/CL/cl_ext_intel.h similarity index 100% rename from cpp/CL/cl_ext_intel.h rename to src/CL/cl_ext_intel.h diff --git a/cpp/CL/cl_gl.h b/src/CL/cl_gl.h similarity index 100% rename from cpp/CL/cl_gl.h rename to src/CL/cl_gl.h diff --git a/cpp/CL/cl_gl_ext.h b/src/CL/cl_gl_ext.h similarity index 100% rename from cpp/CL/cl_gl_ext.h rename to src/CL/cl_gl_ext.h diff --git a/cpp/CL/cl_half.h b/src/CL/cl_half.h similarity index 100% rename from cpp/CL/cl_half.h rename to src/CL/cl_half.h diff --git a/cpp/CL/cl_icd.h b/src/CL/cl_icd.h similarity index 100% rename from cpp/CL/cl_icd.h rename to src/CL/cl_icd.h diff --git a/cpp/CL/cl_platform.h b/src/CL/cl_platform.h similarity index 100% rename from cpp/CL/cl_platform.h rename to src/CL/cl_platform.h diff --git a/cpp/CL/cl_va_api_media_sharing_intel.h b/src/CL/cl_va_api_media_sharing_intel.h similarity index 100% rename from cpp/CL/cl_va_api_media_sharing_intel.h rename to src/CL/cl_va_api_media_sharing_intel.h diff --git a/cpp/CL/cl_version.h b/src/CL/cl_version.h similarity index 100% rename from cpp/CL/cl_version.h rename to src/CL/cl_version.h diff --git a/cpp/CL/opencl.h b/src/CL/opencl.h similarity index 100% rename from cpp/CL/opencl.h rename to src/CL/opencl.h diff --git a/cpp/CMakeLists.txt b/src/CMakeLists.txt similarity index 76% rename from cpp/CMakeLists.txt rename to src/CMakeLists.txt index d4a11cd..e38130f 100644 --- a/cpp/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,46 +8,7 @@ project(BabelStream VERSION 3.5 LANGUAGES CXX) set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_STANDARD_REQUIRED ON) - -#set(MODEL SYCL) -#set(SYCL_COMPILER COMPUTECPP) -#set(SYCL_COMPILER_DIR /home/tom/Desktop/computecpp_archive/ComputeCpp-CE-2.3.0-x86_64-linux-gnu) -#set(MODEL RAJA) -#set(RAJA_IN_TREE /home/tom/Downloads/RAJA-v0.13.0/) -#set(ENABLE_CUDA ON) -#set(TARGET NVIDIA) -#set(CUDA_TOOLKIT_ROOT_DIR /opt/cuda-11.2) -#set(CUDA_ARCH sm_70) -#set(BLT_DIR /home/tom/Downloads/blt-0.3.6/) - -#set(MODEL STD) -#set(ARCH cc70) -#set(CXX_EXTRA_FLAGS -v) - -#set(MODEL CUDA) -#set(ARCH sm_70) -#set(CMAKE_CUDA_COMPILER /opt/cuda-11.2/bin/nvcc) - -#set(MODEL OCL) -#set(OpenCL_LIBRARY /opt/rocm-4.0.0/opencl/lib/libOpenCL.so) -#set(OpenCL_INCLUDE_DIR /opt/rocm-4.0.0/opencl/lib) -#set(RELEASE_FLAGS -Ofast) -#set(CXX_EXTRA_FLAGS -O2) - -#set(CMAKE_CXX_COMPILER /usr/lib/aomp/bin/clang++) -#set(MODEL OMP) -##set(OFFLOAD "AMD:gfx803") -#set(OFFLOAD "NVIDIA:sm_35") -#set(CXX_EXTRA_FLAGS --cuda-path=/opt/cuda-10.2/) - -#set(OFFLOAD "AMD:_70") -#set(CXX_EXTRA_FLAGS --cuda-path=/opt/cuda-10.2/ --gcc-toolchain=/home/tom/spack/opt/spack/linux-fedora33-zen2/gcc-10.2.1/gcc-8.3.0-latmjo2hl2yv53255xkwko7k3y7bx2vv) -#set(CXX_EXTRA_LINKER_FLAGS ) -#set(MODEL HIP) - -#set(MODEL KOKKOS) -#set(KOKKOS_IN_TREE /home/tom/Downloads/kokkos-3.3.00/) - + # the final executable name set(EXE_NAME babelstream) @@ -101,17 +62,17 @@ endif () include(register_models.cmake) # register out models -register_model(OMP OMP OMPStream.cpp) -register_model(OCL OCL OCLStream.cpp) -register_model(STD STD STDStream.cpp) -register_model(STD20 STD20 STD20Stream.cpp) -register_model(HIP HIP HIPStream.cpp) -register_model(CUDA CUDA CUDAStream.cu) -register_model(KOKKOS KOKKOS KokkosStream.cpp) -register_model(SYCL SYCL SYCLStream.cpp) -register_model(ACC ACC ACCStream.cpp) +register_model(omp OMP OMPStream.cpp) +register_model(ocl OCL OCLStream.cpp) +register_model(std STD STDStream.cpp) +register_model(std20 STD20 STD20Stream.cpp) +register_model(hip HIP HIPStream.cpp) +register_model(cuda CUDA CUDAStream.cu) +register_model(kokkos KOKKOS KokkosStream.cpp) +register_model(sycl SYCL SYCLStream.cpp) +register_model(acc ACC ACCStream.cpp) # defining RAJA collides with the RAJA namespace so USE_RAJA -register_model(RAJA USE_RAJA RAJAStream.cpp) +register_model(raja USE_RAJA RAJAStream.cpp) set(USAGE ON CACHE BOOL "Whether to print all custom flags for the selected model") @@ -169,6 +130,7 @@ message(STATUS "Executable : ${EXE_NAME}") # below we have all the usual CMake target setup steps +include_directories(.) add_executable(${EXE_NAME} ${IMPL_SOURCES} main.cpp) target_link_libraries(${EXE_NAME} PUBLIC ${LINK_LIBRARIES}) target_compile_definitions(${EXE_NAME} PUBLIC ${IMPL_DEFINITIONS}) @@ -184,7 +146,7 @@ target_link_options(${EXE_NAME} PUBLIC LINKER:${CXX_EXTRA_LINKER_FLAGS}) target_link_options(${EXE_NAME} PUBLIC ${LINK_FLAGS} ${CXX_EXTRA_LINK_FLAGS}) # some models require the target to be already specified so they can finish their setup here -# this only happens if the MODEL.cmake definition contains the `setup_target` macro +# this only happens if the model.cmake definition contains the `setup_target` macro if (COMMAND setup_target) setup_target(${EXE_NAME}) endif () diff --git a/cpp/Stream.h b/src/Stream.h similarity index 100% rename from cpp/Stream.h rename to src/Stream.h diff --git a/cpp/ACCStream.cpp b/src/acc/ACCStream.cpp similarity index 100% rename from cpp/ACCStream.cpp rename to src/acc/ACCStream.cpp diff --git a/cpp/ACCStream.h b/src/acc/ACCStream.h similarity index 100% rename from cpp/ACCStream.h rename to src/acc/ACCStream.h diff --git a/cpp/OpenACC.make b/src/acc/Makefile similarity index 96% rename from cpp/OpenACC.make rename to src/acc/Makefile index 7a75fd0..55fbfbd 100644 --- a/cpp/OpenACC.make +++ b/src/acc/Makefile @@ -50,8 +50,8 @@ endif FLAGS_GNU = -O3 -std=c++11 -Drestrict=__restrict -fopenacc CXXFLAGS = $(FLAGS_$(COMPILER)) -acc-stream: main.cpp ACCStream.cpp - $(COMPILER_$(COMPILER)) $(CXXFLAGS) -DACC $^ $(EXTRA_FLAGS) -o $@ +acc-stream: ../main.cpp ACCStream.cpp + $(COMPILER_$(COMPILER)) $(CXXFLAGS) -DACC $^ $(EXTRA_FLAGS) -o $@ -I. -I.. .PHONY: clean clean: diff --git a/cpp/ACC.cmake b/src/acc/model.cmake similarity index 100% rename from cpp/ACC.cmake rename to src/acc/model.cmake diff --git a/cpp/ci-prepare-bionic.sh b/src/ci-prepare-bionic.sh similarity index 100% rename from cpp/ci-prepare-bionic.sh rename to src/ci-prepare-bionic.sh diff --git a/cpp/ci-test-compile.sh b/src/ci-test-compile.sh similarity index 61% rename from cpp/ci-test-compile.sh rename to src/ci-test-compile.sh index 1b5c1bb..85efd39 100755 --- a/cpp/ci-test-compile.sh +++ b/src/ci-test-compile.sh @@ -86,38 +86,38 @@ run_build() { } ### -#KOKKOS_SRC="/home/tom/Downloads/kokkos-3.3.00" -#RAJA_SRC="/home/tom/Downloads/RAJA-v0.13.0" -# -#GCC_CXX="/usr/bin/g++" -#CLANG_CXX="/usr/bin/clang++" -# -#NVSDK="/home/tom/Downloads/nvhpc_2021_212_Linux_x86_64_cuda_11.2/install_components/Linux_x86_64/21.2/" -#NVHPC_NVCXX="$NVSDK/compilers/bin/nvc++" -#NVHPC_NVCC="$NVSDK/cuda/11.2/bin/nvcc" -#NVHPC_CUDA_DIR="$NVSDK/cuda/11.2" -#"$NVSDK/compilers/bin/makelocalrc" "$NVSDK/compilers/bin/" -x -# -#AOCC_CXX="/opt/AMD/aocc-compiler-2.3.0/bin/clang++" -#AOMP_CXX="/usr/lib/aomp/bin/clang++" -#OCL_LIB="/home/tom/Downloads/oclcpuexp-2020.11.11.0.04_rel/x64/libOpenCL.so" -# -## AMD needs this rocm_path thing exported... -#export ROCM_PATH="/opt/rocm-4.0.0" -#HIP_CXX="/opt/rocm-4.0.0/bin/hipcc" -#COMPUTECPP_DIR="/home/tom/Desktop/computecpp_archive/ComputeCpp-CE-2.3.0-x86_64-linux-gnu" -#DPCPP_DIR="/home/tom/Downloads/dpcpp_compiler" -#HIPSYCL_DIR="/opt/hipsycl/cff515c/" -# -#ICPX_CXX="/opt/intel/oneapi/compiler/2021.1.2/linux/bin/icpx" -#ICPC_CXX="/opt/intel/oneapi/compiler/2021.1.2/linux/bin/intel64/icpc" -# -#GCC_STD_PAR_LIB="tbb" -#CLANG_STD_PAR_LIB="tbb" -#GCC_OMP_OFFLOAD_AMD=false -#GCC_OMP_OFFLOAD_NVIDIA=true -#CLANG_OMP_OFFLOAD_AMD=false -#CLANG_OMP_OFFLOAD_NVIDIA=false +# KOKKOS_SRC="/home/tom/Downloads/kokkos-3.3.00" +# RAJA_SRC="/home/tom/Downloads/RAJA-v0.13.0" + +# GCC_CXX="/usr/bin/g++" +# CLANG_CXX="/usr/bin/clang++" + +# NVSDK="/home/tom/Downloads/nvhpc_2021_212_Linux_x86_64_cuda_11.2/install_components/Linux_x86_64/21.2/" +# NVHPC_NVCXX="$NVSDK/compilers/bin/nvc++" +# NVHPC_NVCC="$NVSDK/cuda/11.2/bin/nvcc" +# NVHPC_CUDA_DIR="$NVSDK/cuda/11.2" +# "$NVSDK/compilers/bin/makelocalrc" "$NVSDK/compilers/bin/" -x + +# AOCC_CXX="/opt/AMD/aocc-compiler-2.3.0/bin/clang++" +# AOMP_CXX="/usr/lib/aomp/bin/clang++" +# OCL_LIB="/home/tom/Downloads/oclcpuexp-2020.11.11.0.04_rel/x64/libOpenCL.so" + +# # AMD needs this rocm_path thing exported... +# export ROCM_PATH="/opt/rocm-4.0.0" +# HIP_CXX="/opt/rocm-4.0.0/bin/hipcc" +# COMPUTECPP_DIR="/home/tom/Desktop/computecpp_archive/ComputeCpp-CE-2.3.0-x86_64-linux-gnu" +# DPCPP_DIR="/home/tom/Downloads/dpcpp_compiler" +# HIPSYCL_DIR="/opt/hipsycl/cff515c/" + +# ICPX_CXX="/opt/intel/oneapi/compiler/2021.1.2/linux/bin/icpx" +# ICPC_CXX="/opt/intel/oneapi/compiler/2021.1.2/linux/bin/intel64/icpc" + +# GCC_STD_PAR_LIB="tbb" +# CLANG_STD_PAR_LIB="tbb" +# GCC_OMP_OFFLOAD_AMD=false +# GCC_OMP_OFFLOAD_NVIDIA=true +# CLANG_OMP_OFFLOAD_AMD=false +# CLANG_OMP_OFFLOAD_NVIDIA=false ### AMD_ARCH="gfx_903" @@ -128,39 +128,39 @@ build_gcc() { local name="gcc_build" local cxx="-DCMAKE_CXX_COMPILER=${GCC_CXX:?}" - run_build $name "${GCC_CXX:?}" OMP "$cxx" + run_build $name "${GCC_CXX:?}" omp "$cxx" if [ "$MODEL" = "all" ] || [ "$MODEL" = "OMP" ]; then # sanity check that it at least runs - echo "Sanity checking GCC OMP build..." - "./$BUILD_DIR/OMP_$name/omp-stream" -s 1048576 -n 10 + echo "Sanity checking GCC omp build..." + "./$BUILD_DIR/omp_$name/omp-stream" -s 1048576 -n 10 fi # some distributions like Ubuntu bionic implements std par with TBB, so conditionally link it here - run_build $name "${GCC_CXX:?}" STD "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}" - run_build $name "${GCC_CXX:?}" STD20 "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}" + run_build $name "${GCC_CXX:?}" std "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}" + run_build $name "${GCC_CXX:?}" std20 "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}" if [ "${GCC_OMP_OFFLOAD_AMD:-false}" != "false" ]; then - run_build "amd_$name" "${GCC_CXX:?}" ACC "$cxx -DCXX_EXTRA_FLAGS=-foffload=amdgcn-amdhsa" - run_build "amd_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=AMD:$AMD_ARCH" + run_build "amd_$name" "${GCC_CXX:?}" acc "$cxx -DCXX_EXTRA_FLAGS=-foffload=amdgcn-amdhsa" + run_build "amd_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=AMD:$AMD_ARCH" fi if [ "${GCC_OMP_OFFLOAD_NVIDIA:-false}" != "false" ]; then - run_build "nvidia_$name" "${GCC_CXX:?}" ACC "$cxx -DCXX_EXTRA_FLAGS=-foffload=nvptx-none" - run_build "nvidia_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH" + run_build "nvidia_$name" "${GCC_CXX:?}" acc "$cxx -DCXX_EXTRA_FLAGS=-foffload=nvptx-none" + run_build "nvidia_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH" fi - run_build $name "${GCC_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH" - run_build $name "${GCC_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED" - run_build $name "${GCC_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT" - # run_build $name "${CC_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_CUDA=ON" - run_build "cuda_$name" "${GCC_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON" - run_build $name "${GCC_CXX:?}" OCL "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" - run_build $name "${GCC_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}" + run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH" + run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED" + run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT" + # run_build $name "${CC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_CUDA=ON" + run_build "cuda_$name" "${GCC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON" + run_build $name "${GCC_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" + run_build $name "${GCC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}" # FIXME fails due to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100102 # FIXME we also got https://github.com/NVIDIA/nccl/issues/494 -# run_build "cuda_$name" "${GCC_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} \ +# run_build "cuda_$name" "${GCC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} \ # -DENABLE_CUDA=ON \ # -DTARGET=NVIDIA \ # -DCUDA_TOOLKIT_ROOT_DIR=${NVHPC_CUDA_DIR:?} \ @@ -171,46 +171,46 @@ build_gcc() { build_clang() { local name="clang_build" local cxx="-DCMAKE_CXX_COMPILER=${CLANG_CXX:?}" - run_build $name "${CLANG_CXX:?}" OMP "$cxx" + run_build $name "${CLANG_CXX:?}" omp "$cxx" if [ "${CLANG_OMP_OFFLOAD_AMD:-false}" != "false" ]; then - run_build "amd_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=AMD:$AMD_ARCH" + run_build "amd_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=AMD:$AMD_ARCH" fi if [ "${CLANG_OMP_OFFLOAD_NVIDIA:-false}" != "false" ]; then - run_build "nvidia_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH" + run_build "nvidia_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH" fi - run_build $name "${CLANG_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH" - run_build $name "${CLANG_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED" - run_build $name "${CLANG_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT" - run_build $name "${CLANG_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON" - run_build $name "${CLANG_CXX:?}" OCL "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" - run_build $name "${CLANG_CXX:?}" STD "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" - # run_build $name "${LANG_CXX:?}" STD20 "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" # not yet supported - run_build $name "${CLANG_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}" + run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH" + run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED" + run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT" + run_build $name "${CLANG_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON" + run_build $name "${CLANG_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" + run_build $name "${CLANG_CXX:?}" std "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" + # run_build $name "${LANG_CXX:?}" std20 "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" # not yet supported + run_build $name "${CLANG_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}" # no clang /w RAJA+cuda because it needs nvcc which needs gcc } build_nvhpc() { local name="nvhpc_build" local cxx="-DCMAKE_CXX_COMPILER=${NVHPC_NVCXX:?}" - run_build $name "${NVHPC_NVCXX:?}" STD "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY" - run_build $name "${NVHPC_NVCXX:?}" ACC "$cxx -DTARGET_DEVICE=gpu -DTARGET_PROCESSOR=px -DCUDA_ARCH=$NV_ARCH_CCXY" - run_build $name "${NVHPC_NVCXX:?}" ACC "$cxx -DTARGET_DEVICE=multicore -DTARGET_PROCESSOR=zen" + run_build $name "${NVHPC_NVCXX:?}" std "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY" + run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=gpu -DTARGET_PROCESSOR=px -DCUDA_ARCH=$NV_ARCH_CCXY" + run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=multicore -DTARGET_PROCESSOR=zen" } build_aocc() { - run_build aocc_build "${AOCC_CXX:?}" OMP "-DCMAKE_CXX_COMPILER=${AOCC_CXX:?}" + run_build aocc_build "${AOCC_CXX:?}" omp "-DCMAKE_CXX_COMPILER=${AOCC_CXX:?}" } build_aomp() { - run_build aomp_amd_build "${AOMP_CXX:?}" OMP "-DCMAKE_CXX_COMPILER=${AOMP_CXX:?} -DOFFLOAD=AMD:gfx906" + run_build aomp_amd_build "${AOMP_CXX:?}" omp "-DCMAKE_CXX_COMPILER=${AOMP_CXX:?} -DOFFLOAD=AMD:gfx906" #run_build aomp_nvidia_build "-DCMAKE_CXX_COMPILER=${AOMP_CXX:?} -DOFFLOAD=NVIDIA:$NV_ARCH" } build_hip() { - run_build hip_build "${HIP_CXX:?}" HIP "-DCMAKE_CXX_COMPILER=${HIP_CXX:?}" + run_build hip_build "${HIP_CXX:?}" hip "-DCMAKE_CXX_COMPILER=${HIP_CXX:?}" } build_icpx() { @@ -218,7 +218,7 @@ build_icpx() { set +u source /opt/intel/oneapi/setvars.sh -force || true set -u - run_build intel_build "${ICPX_CXX:?}" OMP "-DCMAKE_CXX_COMPILER=${ICPX_CXX:?} -DOFFLOAD=INTEL" + run_build intel_build "${ICPX_CXX:?}" omp "-DCMAKE_CXX_COMPILER=${ICPX_CXX:?} -DOFFLOAD=INTEL" } build_icpc() { @@ -228,31 +228,31 @@ build_icpc() { set -u local name="intel_build" local cxx="-DCMAKE_CXX_COMPILER=${ICPC_CXX:?}" - run_build $name "${ICPC_CXX:?}" OMP "$cxx" - run_build $name "${ICPC_CXX:?}" OCL "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" - run_build $name "${ICPC_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}" - run_build $name "${ICPC_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON" + run_build $name "${ICPC_CXX:?}" omp "$cxx" + run_build $name "${ICPC_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}" + run_build $name "${ICPC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}" + run_build $name "${ICPC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON" } build_computecpp() { - run_build computecpp_build "compute++" SYCL "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \ + run_build computecpp_build "compute++" sycl "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \ -DSYCL_COMPILER=COMPUTECPP \ -DSYCL_COMPILER_DIR=${COMPUTECPP_DIR:?} \ -DOpenCL_LIBRARY=${OCL_LIB:?}" } build_dpcpp() { - run_build intel_build "${DPCPP_DIR:?}" SYCL "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \ + run_build intel_build "${DPCPP_DIR:?}" sycl "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \ -DSYCL_COMPILER=DPCPP \ -DSYCL_COMPILER_DIR=${DPCPP_DIR:?}" # for oneAPI BaseKit: # source /opt/intel/oneapi/setvars.sh -force - # run_build intel_build "dpcpp" SYCL "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} -DSYCL_COMPILER=ONEAPI-DPCPP" + # run_build intel_build "dpcpp" sycl "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} -DSYCL_COMPILER=ONEAPI-DPCPP" } build_hipsycl() { - run_build hipsycl_build "syclcc" SYCL " + run_build hipsycl_build "syclcc" sycl " -DSYCL_COMPILER=HIPSYCL \ -DSYCL_COMPILER_DIR=${HIPSYCL_DIR:?}" } diff --git a/cpp/cmake/Modules/ComputeCppCompilerChecks.cmake b/src/cmake/Modules/ComputeCppCompilerChecks.cmake similarity index 100% rename from cpp/cmake/Modules/ComputeCppCompilerChecks.cmake rename to src/cmake/Modules/ComputeCppCompilerChecks.cmake diff --git a/cpp/cmake/Modules/ComputeCppIRMap.cmake b/src/cmake/Modules/ComputeCppIRMap.cmake similarity index 100% rename from cpp/cmake/Modules/ComputeCppIRMap.cmake rename to src/cmake/Modules/ComputeCppIRMap.cmake diff --git a/cpp/cmake/Modules/FindComputeCpp.cmake b/src/cmake/Modules/FindComputeCpp.cmake similarity index 100% rename from cpp/cmake/Modules/FindComputeCpp.cmake rename to src/cmake/Modules/FindComputeCpp.cmake diff --git a/cpp/cmake/toolchains/arm-gcc-poky.cmake b/src/cmake/toolchains/arm-gcc-poky.cmake similarity index 100% rename from cpp/cmake/toolchains/arm-gcc-poky.cmake rename to src/cmake/toolchains/arm-gcc-poky.cmake diff --git a/cpp/cmake/toolchains/gcc-generic.cmake b/src/cmake/toolchains/gcc-generic.cmake similarity index 100% rename from cpp/cmake/toolchains/gcc-generic.cmake rename to src/cmake/toolchains/gcc-generic.cmake diff --git a/cpp/CUDAStream.cu b/src/cuda/CUDAStream.cu similarity index 100% rename from cpp/CUDAStream.cu rename to src/cuda/CUDAStream.cu diff --git a/cpp/CUDAStream.h b/src/cuda/CUDAStream.h similarity index 100% rename from cpp/CUDAStream.h rename to src/cuda/CUDAStream.h diff --git a/cpp/CUDA.make b/src/cuda/Makefile similarity index 88% rename from cpp/CUDA.make rename to src/cuda/Makefile index 90aa77c..153f07d 100644 --- a/cpp/CUDA.make +++ b/src/cuda/Makefile @@ -31,8 +31,8 @@ MEM_PAGEFAULT= -DPAGEFAULT MEM_MODE = $(MEM_$(MEM)) -cuda-stream: main.cpp CUDAStream.cu - $(CUDA_CXX) -std=c++11 $(CXXFLAGS) -arch=$(NVARCH) $(MEM_MODE) -DCUDA $^ $(EXTRA_FLAGS) -o $@ +cuda-stream: ../main.cpp CUDAStream.cu + $(CUDA_CXX) -std=c++11 $(CXXFLAGS) -arch=$(NVARCH) $(MEM_MODE) -DCUDA $^ $(EXTRA_FLAGS) -o $@ -I. -I.. .PHONY: clean clean: diff --git a/cpp/CUDA.cmake b/src/cuda/model.cmake similarity index 100% rename from cpp/CUDA.cmake rename to src/cuda/model.cmake diff --git a/cpp/HIPStream.cpp b/src/hip/HIPStream.cpp similarity index 100% rename from cpp/HIPStream.cpp rename to src/hip/HIPStream.cpp diff --git a/cpp/HIPStream.h b/src/hip/HIPStream.h similarity index 100% rename from cpp/HIPStream.h rename to src/hip/HIPStream.h diff --git a/cpp/HIP.make b/src/hip/Makefile similarity index 76% rename from cpp/HIP.make rename to src/hip/Makefile index 7a1196f..21383b0 100644 --- a/cpp/HIP.make +++ b/src/hip/Makefile @@ -2,8 +2,8 @@ HIP_PATH?= /opt/rocm/hip HIPCC=$(HIP_PATH)/bin/hipcc -hip-stream: main.cpp HIPStream.cpp - $(HIPCC) $(CXXFLAGS) -O3 -std=c++11 -DHIP $^ $(EXTRA_FLAGS) -o $@ +hip-stream: ../main.cpp HIPStream.cpp + $(HIPCC) $(CXXFLAGS) -O3 -std=c++11 -DHIP $^ $(EXTRA_FLAGS) -o $@ -I. -I.. .PHONY: clean clean: diff --git a/cpp/HIP.cmake b/src/hip/model.cmake similarity index 100% rename from cpp/HIP.cmake rename to src/hip/model.cmake diff --git a/cpp/KokkosStream.cpp b/src/kokkos/KokkosStream.cpp similarity index 100% rename from cpp/KokkosStream.cpp rename to src/kokkos/KokkosStream.cpp diff --git a/cpp/KokkosStream.hpp b/src/kokkos/KokkosStream.hpp similarity index 100% rename from cpp/KokkosStream.hpp rename to src/kokkos/KokkosStream.hpp diff --git a/cpp/Kokkos.make b/src/kokkos/Makefile similarity index 85% rename from cpp/Kokkos.make rename to src/kokkos/Makefile index 7dd6af8..98d8597 100644 --- a/cpp/Kokkos.make +++ b/src/kokkos/Makefile @@ -71,7 +71,7 @@ CXX = $(NVCC_WRAPPER) endif endif -OBJ = main.o KokkosStream.o +OBJ = KokkosStream.o CXXFLAGS = -O3 LINKFLAGS = # empty for now @@ -85,14 +85,15 @@ endif endif include $(KOKKOS_PATH)/Makefile.kokkos +HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp) -kokkos-stream: $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(CXX) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -DKOKKOS -o $@ +kokkos-stream: ../main.cpp $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(CXX) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -DKOKKOS -o $@ -I. -I.. -%.o: %.cpp $(KOKKOS_CPP_DEPENDS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -DKOKKOS -c $< +%.o: %.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) .PHONY: clean clean: - rm -f kokkos-stream main.o KokkosStream.o Kokkos_*.o + rm -f kokkos-stream main.o KokkosStream.o Kokkos_*.o KokkosCore_* diff --git a/cpp/KOKKOS.cmake b/src/kokkos/model.cmake similarity index 100% rename from cpp/KOKKOS.cmake rename to src/kokkos/model.cmake diff --git a/cpp/main.cpp b/src/main.cpp similarity index 100% rename from cpp/main.cpp rename to src/main.cpp diff --git a/cpp/OpenCL.make b/src/ocl/Makefile similarity index 85% rename from cpp/OpenCL.make rename to src/ocl/Makefile index 8ad7108..20cd257 100644 --- a/cpp/OpenCL.make +++ b/src/ocl/Makefile @@ -30,8 +30,8 @@ else LIBS = -lOpenCL endif -ocl-stream: main.cpp OCLStream.cpp - $(CXX) $(CXXFLAGS) -DOCL $^ $(EXTRA_FLAGS) $(LIBS) -o $@ +ocl-stream: ../main.cpp OCLStream.cpp + $(CXX) $(CXXFLAGS) -DOCL $^ $(EXTRA_FLAGS) $(LIBS) -o $@ -I. -I.. .PHONY: clean clean: diff --git a/cpp/OCLStream.cpp b/src/ocl/OCLStream.cpp similarity index 100% rename from cpp/OCLStream.cpp rename to src/ocl/OCLStream.cpp diff --git a/cpp/OCLStream.h b/src/ocl/OCLStream.h similarity index 100% rename from cpp/OCLStream.h rename to src/ocl/OCLStream.h diff --git a/cpp/OCL.cmake b/src/ocl/model.cmake similarity index 100% rename from cpp/OCL.cmake rename to src/ocl/model.cmake diff --git a/cpp/OpenMP.make b/src/omp/Makefile similarity index 95% rename from cpp/OpenMP.make rename to src/omp/Makefile index dde3f75..2d8545c 100644 --- a/cpp/OpenMP.make +++ b/src/omp/Makefile @@ -91,8 +91,8 @@ endif OMP = $(OMP_$(COMPILER)_$(TARGET)) -omp-stream: main.cpp OMPStream.cpp - $(CXX) $(CXXFLAGS) -DOMP $^ $(OMP) $(EXTRA_FLAGS) -o $@ +omp-stream: ../main.cpp OMPStream.cpp + $(CXX) $(CXXFLAGS) -DOMP $^ $(OMP) $(EXTRA_FLAGS) -o $@ -I. -I.. .PHONY: clean clean: diff --git a/cpp/OMPStream.cpp b/src/omp/OMPStream.cpp similarity index 100% rename from cpp/OMPStream.cpp rename to src/omp/OMPStream.cpp diff --git a/cpp/OMPStream.h b/src/omp/OMPStream.h similarity index 100% rename from cpp/OMPStream.h rename to src/omp/OMPStream.h diff --git a/cpp/OMP.cmake b/src/omp/model.cmake similarity index 100% rename from cpp/OMP.cmake rename to src/omp/model.cmake diff --git a/cpp/RAJA.make b/src/raja/Makefile similarity index 92% rename from cpp/RAJA.make rename to src/raja/Makefile index 47aeefb..60f2319 100644 --- a/cpp/RAJA.make +++ b/src/raja/Makefile @@ -49,8 +49,8 @@ endif CXXFLAGS = --expt-extended-lambda -O3 -std=c++11 -x cu -Xcompiler -fopenmp -arch $(ARCH) endif -raja-stream: main.cpp RAJAStream.cpp - $(CXX) $(CXXFLAGS) -DUSE_RAJA -I$(RAJA_PATH)/include $^ $(EXTRA_FLAGS) -L$(RAJA_PATH)/lib -lRAJA -o $@ +raja-stream: ../main.cpp RAJAStream.cpp + $(CXX) $(CXXFLAGS) -DUSE_RAJA -I$(RAJA_PATH)/include $^ $(EXTRA_FLAGS) -L$(RAJA_PATH)/lib -lRAJA -o $@ -I. -I.. .PHONY: clean clean: diff --git a/cpp/RAJAStream.cpp b/src/raja/RAJAStream.cpp similarity index 100% rename from cpp/RAJAStream.cpp rename to src/raja/RAJAStream.cpp diff --git a/cpp/RAJAStream.hpp b/src/raja/RAJAStream.hpp similarity index 100% rename from cpp/RAJAStream.hpp rename to src/raja/RAJAStream.hpp diff --git a/cpp/RAJA.cmake b/src/raja/model.cmake similarity index 100% rename from cpp/RAJA.cmake rename to src/raja/model.cmake diff --git a/cpp/register_models.cmake b/src/register_models.cmake similarity index 93% rename from cpp/register_models.cmake rename to src/register_models.cmake index 82e7243..247612a 100644 --- a/cpp/register_models.cmake +++ b/src/register_models.cmake @@ -118,22 +118,24 @@ endfunction() macro(register_model NAME PREPROCESSOR_NAME) - string(TOUPPER ${NAME} MODEL_UPPER) list(APPEND REGISTERED_MODELS "${NAME}") - list(APPEND IMPL_${MODEL_UPPER}_SOURCES "${ARGN}") + string(TOUPPER ${NAME} MODEL_UPPER) + list(APPEND IMPL_${MODEL_UPPER}_SOURCES "${NAME}/${ARGN}") list(APPEND IMPL_${MODEL_UPPER}_DEFINITIONS "${PREPROCESSOR_NAME}") endmacro() macro(load_model MODEL) - string(TOUPPER "${MODEL}" MODEL_UPPER) - if ("${MODEL_UPPER}" IN_LIST REGISTERED_MODELS) - set(MODEL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${MODEL_UPPER}.cmake) + if ("${MODEL}" IN_LIST REGISTERED_MODELS) + string(TOLOWER "${MODEL}" MODEL_LOWER) + set(MODEL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${MODEL_LOWER}/model.cmake) + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/${MODEL_LOWER}) if (NOT EXISTS ${MODEL_FILE}) message(FATAL_ERROR "${MODEL_FILE} not found, perhaps it needs to be implemented?") endif () include(${MODEL_FILE}) + string(TOUPPER "${MODEL}" MODEL_UPPER) list(APPEND IMPL_SOURCES ${IMPL_${MODEL_UPPER}_SOURCES}) list(APPEND IMPL_DEFINITIONS ${IMPL_${MODEL_UPPER}_DEFINITIONS}) diff --git a/cpp/STD.make b/src/std/Makefile similarity index 72% rename from cpp/STD.make rename to src/std/Makefile index 3225a08..a5a8847 100644 --- a/cpp/STD.make +++ b/src/std/Makefile @@ -6,8 +6,8 @@ CXXFLAGS=-O3 -std=c++17 -stdpar -DSTD STD_CXX=nvc++ -std-stream: main.cpp STDStream.cpp - $(STD_CXX) $(CXXFLAGS) $^ $(EXTRA_FLAGS) -o $@ +std-stream: ../main.cpp STDStream.cpp + $(STD_CXX) $(CXXFLAGS) $^ $(EXTRA_FLAGS) -o $@ -I. -I.. .PHONY: clean clean: diff --git a/cpp/STDStream.cpp b/src/std/STDStream.cpp similarity index 100% rename from cpp/STDStream.cpp rename to src/std/STDStream.cpp diff --git a/cpp/STDStream.h b/src/std/STDStream.h similarity index 100% rename from cpp/STDStream.h rename to src/std/STDStream.h diff --git a/cpp/STD.cmake b/src/std/model.cmake similarity index 100% rename from cpp/STD.cmake rename to src/std/model.cmake diff --git a/cpp/STD20.make b/src/std20/Makefile similarity index 76% rename from cpp/STD20.make rename to src/std20/Makefile index eced9f7..3a93bcb 100644 --- a/cpp/STD20.make +++ b/src/std20/Makefile @@ -17,8 +17,8 @@ FLAGS_GNU = -O3 -std=c++2a -march=native CXXFLAGS = $(FLAGS_$(COMPILER)) -std20-stream: main.cpp STD20Stream.cpp - $(CXX) -DSTD20 $(CXXFLAGS) $^ $(EXTRA_FLAGS) -o $@ +std20-stream: ../main.cpp STD20Stream.cpp + $(CXX) -DSTD20 $(CXXFLAGS) $^ $(EXTRA_FLAGS) -o $@ -I. -I.. .PHONY: clean clean: diff --git a/cpp/STD20Stream.cpp b/src/std20/STD20Stream.cpp similarity index 100% rename from cpp/STD20Stream.cpp rename to src/std20/STD20Stream.cpp diff --git a/cpp/STD20Stream.hpp b/src/std20/STD20Stream.hpp similarity index 100% rename from cpp/STD20Stream.hpp rename to src/std20/STD20Stream.hpp diff --git a/cpp/STD20.cmake b/src/std20/model.cmake similarity index 100% rename from cpp/STD20.cmake rename to src/std20/model.cmake diff --git a/cpp/SYCL.make b/src/sycl/Makefile similarity index 96% rename from cpp/SYCL.make rename to src/sycl/Makefile index 58df8d0..05d2022 100644 --- a/cpp/SYCL.make +++ b/src/sycl/Makefile @@ -73,8 +73,8 @@ SYCL_LINK_FLAGS = $(SYCL_$(COMPILER)_LINK_FLAGS) SYCL_INCLUDE = $(SYCL_$(COMPILER)_INCLUDE) # only ComputeCpp generates .sycl files which is a bit odd to deal with so we opted to compile everything together -sycl-stream: main.cpp SYCLStream.cpp - $(SYCL_SYCLCXX) $(SYCL_SYCLFLAGS) $(SYCL_FLAGS) $(SYCL_INCLUDE) -DSYCL $(EXTRA_FLAGS) $(SYCL_LINK_FLAGS) $^ -o $@ +sycl-stream: ../main.cpp SYCLStream.cpp + $(SYCL_SYCLCXX) $(SYCL_SYCLFLAGS) $(SYCL_FLAGS) $(SYCL_INCLUDE) -DSYCL $(EXTRA_FLAGS) $(SYCL_LINK_FLAGS) $^ -o $@ -I. -I.. .PHONY: clean clean: diff --git a/cpp/SYCLStream.cpp b/src/sycl/SYCLStream.cpp similarity index 100% rename from cpp/SYCLStream.cpp rename to src/sycl/SYCLStream.cpp diff --git a/cpp/SYCLStream.h b/src/sycl/SYCLStream.h similarity index 100% rename from cpp/SYCLStream.h rename to src/sycl/SYCLStream.h diff --git a/cpp/SYCL.cmake b/src/sycl/model.cmake similarity index 100% rename from cpp/SYCL.cmake rename to src/sycl/model.cmake From 4d00a8699e1889b01e4194f5deb311462b2f0454 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Thu, 27 May 2021 09:41:41 +0100 Subject: [PATCH 05/10] Don't point to the CL dir for SYCL --- src/sycl/model.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sycl/model.cmake b/src/sycl/model.cmake index c35f435..c0c6c3f 100644 --- a/src/sycl/model.cmake +++ b/src/sycl/model.cmake @@ -48,7 +48,7 @@ macro(setup) set(ComputeCpp_DIR ${SYCL_COMPILER_DIR}) # don't point to the CL dir as the imports already have the CL prefix - set(OpenCL_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/CL") + set(OpenCL_INCLUDE_DIR "${CMAKE_SOURCE_DIR}") register_definitions(CL_TARGET_OPENCL_VERSION=220 _GLIBCXX_USE_CXX11_ABI=0) # ComputeCpp needs OpenCL From d3b676cb37771740c57d974404df42aa213cad93 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Thu, 27 May 2021 10:47:46 +0100 Subject: [PATCH 06/10] Include CL_MEM_CHANNEL_INTEL directly to avoid header precedence issues --- src/sycl/SYCLStream.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/sycl/SYCLStream.h b/src/sycl/SYCLStream.h index f312009..dd13387 100644 --- a/src/sycl/SYCLStream.h +++ b/src/sycl/SYCLStream.h @@ -11,6 +11,15 @@ #include "Stream.h" +#include "CL/opencl.h" + +// XXX Intel's SYCL impl. needs CL_MEM_CHANNEL_INTEL which is provided in dpcpp's include dir +// however, depending the system configuration, the system CL header sometimes takes precedence +// we only really need this macro to refer to the extension so this is probably OK +#ifndef CL_MEM_CHANNEL_INTEL +#define CL_MEM_CHANNEL_INTEL 0x4213 +#endif + #include "CL/sycl.hpp" #define IMPLEMENTATION_STRING "SYCL" From f5fe55c204bde0888d78a77d10e01a0060af1ec3 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Tue, 30 Nov 2021 18:22:55 +0000 Subject: [PATCH 07/10] [WIP] Drop CL headers and Makefiles Update README Move new models to /src --- .github/workflows/main.yaml | 4 +- README.android | 36 - README.md | 43 +- TBB.make | 56 - legacy/Makefile | 22 - src/CL/cl.h | 1902 ----------------- src/CL/cl_d3d10.h | 117 - src/CL/cl_d3d11.h | 117 - src/CL/cl_dx9_media_sharing.h | 118 - src/CL/cl_dx9_media_sharing_intel.h | 170 -- src/CL/cl_egl.h | 120 -- src/CL/cl_ext.h | 841 -------- src/CL/cl_ext_intel.h | 682 ------ src/CL/cl_gl.h | 159 -- src/CL/cl_gl_ext.h | 40 - src/CL/cl_half.h | 440 ---- src/CL/cl_icd.h | 1287 ----------- src/CL/cl_platform.h | 1384 ------------ src/CL/cl_va_api_media_sharing_intel.h | 160 -- src/CL/cl_version.h | 81 - src/CL/opencl.h | 33 - src/acc/Makefile | 58 - src/cuda/Makefile | 40 - src/hip/Makefile | 11 - .../java/java-stream}/.gitignore | 0 .../.mvn/wrapper/maven-wrapper.jar | Bin .../.mvn/wrapper/maven-wrapper.properties | 0 .../java/java-stream}/README.md | 0 {java-stream => src/java/java-stream}/mvnw | 0 .../java/java-stream}/mvnw.cmd | 0 {java-stream => src/java/java-stream}/pom.xml | 0 .../main/java/javastream/FractionalMaths.java | 0 .../src/main/java/javastream/JavaStream.java | 0 .../src/main/java/javastream/Main.java | 0 .../javastream/aparapi/AparapiStreams.java | 0 .../aparapi/GenericAparapiStreamKernel.java | 0 .../aparapi/SpecialisedDoubleKernel.java | 0 .../aparapi/SpecialisedFloatKernel.java | 0 .../javastream/jdk/GenericPlainStream.java | 0 .../java/javastream/jdk/GenericStream.java | 0 .../main/java/javastream/jdk/JdkStreams.java | 0 .../main/java/javastream/jdk/PlainStream.java | 0 .../jdk/SpecialisedDoubleStream.java | 0 .../jdk/SpecialisedFloatStream.java | 0 .../jdk/SpecialisedPlainDoubleStream.java | 0 .../jdk/SpecialisedPlainFloatStream.java | 0 .../tornadovm/GenericTornadoVMStream.java | 0 .../tornadovm/SpecialisedDouble.java | 0 .../tornadovm/SpecialisedFloat.java | 0 .../tornadovm/TornadoVMStreams.java | 0 .../src/test/java/javastream/SmokeTest.java | 0 .../JuliaStream.jl}/.JuliaFormatter.toml | 0 .../julia/JuliaStream.jl}/.gitignore | 0 .../JuliaStream.jl}/AMDGPU/Manifest.toml | 0 .../julia/JuliaStream.jl}/AMDGPU/Project.toml | 0 .../julia/JuliaStream.jl}/CUDA/Manifest.toml | 0 .../julia/JuliaStream.jl}/CUDA/Project.toml | 0 .../KernelAbstractions/Manifest.toml | 0 .../KernelAbstractions/Project.toml | 0 .../julia/JuliaStream.jl}/Manifest.toml | 0 .../julia/JuliaStream.jl}/Project.toml | 0 .../julia/JuliaStream.jl}/README.md | 0 .../JuliaStream.jl}/Threaded/Manifest.toml | 0 .../JuliaStream.jl}/Threaded/Project.toml | 0 .../JuliaStream.jl}/oneAPI/Manifest.toml | 0 .../julia/JuliaStream.jl}/oneAPI/Project.toml | 0 .../julia/JuliaStream.jl}/src/AMDGPUStream.jl | 0 .../julia/JuliaStream.jl}/src/CUDAStream.jl | 0 .../JuliaStream.jl}/src/DistributedStream.jl | 0 .../julia/JuliaStream.jl}/src/JuliaStream.jl | 0 .../src/KernelAbstractionsStream.jl | 0 .../julia/JuliaStream.jl}/src/PlainStream.jl | 0 .../julia/JuliaStream.jl}/src/Stream.jl | 0 .../julia/JuliaStream.jl}/src/StreamData.jl | 0 .../JuliaStream.jl}/src/ThreadedStream.jl | 0 .../julia/JuliaStream.jl}/src/oneAPIStream.jl | 0 .../julia/JuliaStream.jl}/update_all.sh | 0 src/kokkos/Makefile | 99 - {legacy => src/legacy}/HCStream.cpp | 0 {legacy => src/legacy}/HCStream.h | 0 src/{ => ocl}/CL/cl2.hpp | 0 src/ocl/Makefile | 39 - src/omp/Makefile | 103 - src/raja/Makefile | 58 - .../scala/scala-stream}/.bsp/sbt.json | 0 .../scala/scala-stream}/.gitignore | 0 .../scala/scala-stream}/.jvmopts | 0 .../scala/scala-stream}/.scalafmt.conf | 0 .../scala/scala-stream}/README.md | 0 .../scala/scala-stream}/build.sbt | 0 .../scala-stream}/project/build.properties | 0 .../scala/scala-stream}/project/plugins.sbt | 0 .../scala/scala-stream}/reflect-config.json | 0 {scala-stream => src/scala/scala-stream}/sbt | 0 .../sbt-dist/bin/java9-rt-export.jar | Bin .../scala/scala-stream}/sbt-dist/bin/sbt | 0 .../sbt-dist/bin/sbt-launch-lib.bash | 0 .../scala-stream}/sbt-dist/bin/sbt-launch.jar | Bin .../scala/scala-stream}/sbt-dist/bin/sbt.bat | 0 .../scala-stream}/sbt-dist/conf/sbtconfig.txt | 0 .../scala/scala-stream}/sbt-dist/conf/sbtopts | 0 .../main/scala/scalastream/J8SStream.scala | 0 .../main/scala/scalastream/ParStream.scala | 0 .../main/scala/scalastream/PlainStream.scala | 0 .../main/scala/scalastream/ScalaStream.scala | 0 .../main/scala/scalastream/ThreadStream.scala | 0 src/std/Makefile | 14 - src/std20/Makefile | 26 - src/sycl/Makefile | 81 - TBB.cmake => src/tbb/TBB.cmake | 0 TBBStream.cpp => src/tbb/TBBStream.cpp | 0 TBBStream.hpp => src/tbb/TBBStream.hpp | 0 THRUST.cmake => src/thrust/THRUST.cmake | 0 ThrustStream.cu => src/thrust/ThrustStream.cu | 0 ThrustStream.h => src/thrust/ThrustStream.h | 0 115 files changed, 10 insertions(+), 8331 deletions(-) delete mode 100644 README.android delete mode 100644 TBB.make delete mode 100644 legacy/Makefile delete mode 100644 src/CL/cl.h delete mode 100644 src/CL/cl_d3d10.h delete mode 100644 src/CL/cl_d3d11.h delete mode 100644 src/CL/cl_dx9_media_sharing.h delete mode 100644 src/CL/cl_dx9_media_sharing_intel.h delete mode 100644 src/CL/cl_egl.h delete mode 100644 src/CL/cl_ext.h delete mode 100644 src/CL/cl_ext_intel.h delete mode 100644 src/CL/cl_gl.h delete mode 100644 src/CL/cl_gl_ext.h delete mode 100644 src/CL/cl_half.h delete mode 100644 src/CL/cl_icd.h delete mode 100644 src/CL/cl_platform.h delete mode 100644 src/CL/cl_va_api_media_sharing_intel.h delete mode 100644 src/CL/cl_version.h delete mode 100644 src/CL/opencl.h delete mode 100644 src/acc/Makefile delete mode 100644 src/cuda/Makefile delete mode 100644 src/hip/Makefile rename {java-stream => src/java/java-stream}/.gitignore (100%) rename {java-stream => src/java/java-stream}/.mvn/wrapper/maven-wrapper.jar (100%) rename {java-stream => src/java/java-stream}/.mvn/wrapper/maven-wrapper.properties (100%) rename {java-stream => src/java/java-stream}/README.md (100%) rename {java-stream => src/java/java-stream}/mvnw (100%) rename {java-stream => src/java/java-stream}/mvnw.cmd (100%) rename {java-stream => src/java/java-stream}/pom.xml (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/FractionalMaths.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/JavaStream.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/Main.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/aparapi/AparapiStreams.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/aparapi/GenericAparapiStreamKernel.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/aparapi/SpecialisedDoubleKernel.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/aparapi/SpecialisedFloatKernel.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/jdk/GenericPlainStream.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/jdk/GenericStream.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/jdk/JdkStreams.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/jdk/PlainStream.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/jdk/SpecialisedDoubleStream.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/jdk/SpecialisedFloatStream.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/jdk/SpecialisedPlainDoubleStream.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/jdk/SpecialisedPlainFloatStream.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/tornadovm/GenericTornadoVMStream.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/tornadovm/SpecialisedDouble.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/tornadovm/SpecialisedFloat.java (100%) rename {java-stream => src/java/java-stream}/src/main/java/javastream/tornadovm/TornadoVMStreams.java (100%) rename {java-stream => src/java/java-stream}/src/test/java/javastream/SmokeTest.java (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/.JuliaFormatter.toml (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/.gitignore (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/AMDGPU/Manifest.toml (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/AMDGPU/Project.toml (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/CUDA/Manifest.toml (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/CUDA/Project.toml (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/KernelAbstractions/Manifest.toml (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/KernelAbstractions/Project.toml (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/Manifest.toml (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/Project.toml (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/README.md (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/Threaded/Manifest.toml (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/Threaded/Project.toml (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/oneAPI/Manifest.toml (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/oneAPI/Project.toml (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/src/AMDGPUStream.jl (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/src/CUDAStream.jl (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/src/DistributedStream.jl (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/src/JuliaStream.jl (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/src/KernelAbstractionsStream.jl (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/src/PlainStream.jl (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/src/Stream.jl (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/src/StreamData.jl (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/src/ThreadedStream.jl (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/src/oneAPIStream.jl (100%) rename {JuliaStream.jl => src/julia/JuliaStream.jl}/update_all.sh (100%) delete mode 100644 src/kokkos/Makefile rename {legacy => src/legacy}/HCStream.cpp (100%) rename {legacy => src/legacy}/HCStream.h (100%) rename src/{ => ocl}/CL/cl2.hpp (100%) delete mode 100644 src/ocl/Makefile delete mode 100644 src/omp/Makefile delete mode 100644 src/raja/Makefile rename {scala-stream => src/scala/scala-stream}/.bsp/sbt.json (100%) rename {scala-stream => src/scala/scala-stream}/.gitignore (100%) rename {scala-stream => src/scala/scala-stream}/.jvmopts (100%) rename {scala-stream => src/scala/scala-stream}/.scalafmt.conf (100%) rename {scala-stream => src/scala/scala-stream}/README.md (100%) rename {scala-stream => src/scala/scala-stream}/build.sbt (100%) rename {scala-stream => src/scala/scala-stream}/project/build.properties (100%) rename {scala-stream => src/scala/scala-stream}/project/plugins.sbt (100%) rename {scala-stream => src/scala/scala-stream}/reflect-config.json (100%) rename {scala-stream => src/scala/scala-stream}/sbt (100%) rename {scala-stream => src/scala/scala-stream}/sbt-dist/bin/java9-rt-export.jar (100%) rename {scala-stream => src/scala/scala-stream}/sbt-dist/bin/sbt (100%) rename {scala-stream => src/scala/scala-stream}/sbt-dist/bin/sbt-launch-lib.bash (100%) rename {scala-stream => src/scala/scala-stream}/sbt-dist/bin/sbt-launch.jar (100%) rename {scala-stream => src/scala/scala-stream}/sbt-dist/bin/sbt.bat (100%) rename {scala-stream => src/scala/scala-stream}/sbt-dist/conf/sbtconfig.txt (100%) rename {scala-stream => src/scala/scala-stream}/sbt-dist/conf/sbtopts (100%) rename {scala-stream => src/scala/scala-stream}/src/main/scala/scalastream/J8SStream.scala (100%) rename {scala-stream => src/scala/scala-stream}/src/main/scala/scalastream/ParStream.scala (100%) rename {scala-stream => src/scala/scala-stream}/src/main/scala/scalastream/PlainStream.scala (100%) rename {scala-stream => src/scala/scala-stream}/src/main/scala/scalastream/ScalaStream.scala (100%) rename {scala-stream => src/scala/scala-stream}/src/main/scala/scalastream/ThreadStream.scala (100%) delete mode 100644 src/std/Makefile delete mode 100644 src/std20/Makefile delete mode 100644 src/sycl/Makefile rename TBB.cmake => src/tbb/TBB.cmake (100%) rename TBBStream.cpp => src/tbb/TBBStream.cpp (100%) rename TBBStream.hpp => src/tbb/TBBStream.hpp (100%) rename THRUST.cmake => src/thrust/THRUST.cmake (100%) rename ThrustStream.cu => src/thrust/ThrustStream.cu (100%) rename ThrustStream.h => src/thrust/ThrustStream.h (100%) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index b7cc493..423064a 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-18.04 defaults: run: - working-directory: ./java-stream + working-directory: ./src/java/java-stream steps: - uses: actions/checkout@v2 - name: Test build project @@ -21,7 +21,7 @@ jobs: runs-on: ubuntu-18.04 defaults: run: - working-directory: ./JuliaStream.jl + working-directory: ./src/julia/JuliaStream.jl steps: - uses: actions/checkout@v2 - name: Setup project diff --git a/README.android b/README.android deleted file mode 100644 index edc4a52..0000000 --- a/README.android +++ /dev/null @@ -1,36 +0,0 @@ -Android (outdated instructions) ------------------- - -Assuming you have a recent Android NDK available, you can use the -toolchain that it provides to build GPU-STREAM. You should first -use the NDK to generate a standalone toolchain: - - # Select a directory to install the toolchain to - ANDROID_NATIVE_TOOLCHAIN=/path/to/toolchain - - ${NDK}/build/tools/make-standalone-toolchain.sh \ - --platform=android-14 \ - --toolchain=arm-linux-androideabi-4.8 \ - --install-dir=${ANDROID_NATIVE_TOOLCHAIN} - -Make sure that the OpenCL headers and library (libOpenCL.so) are -available in `${ANDROID_NATIVE_TOOLCHAIN}/sysroot/usr/`. - -You should then be able to build GPU-STREAM: - - make CXX=${ANDROID_NATIVE_TOOLCHAIN}/bin/arm-linux-androideabi-g++ - -Copy the executable and OpenCL kernels to the device: - - adb push gpu-stream-ocl /data/local/tmp - adb push ocl-stream-kernels.cl /data/local/tmp - -Run GPU-STREAM from an adb shell: - - adb shell - cd /data/local/tmp - - # Use float if device doesn't support double, and reduce array size - ./gpu-stream-ocl --float -n 6 -s 10000000 - - diff --git a/README.md b/README.md index 598b4cd..d36da1a 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ There are multiple implementations of this benchmark in a variety of programming Currently implemented are: - OpenCL - CUDA + - HIP - OpenACC - OpenMP 3 and 4.5 - C++ Parallel STL @@ -20,11 +21,14 @@ Currently implemented are: - RAJA - SYCL - TBB + - Thrust (via CUDA or HIP) This code was previously called GPU-STREAM. This project also contains implementations in alternative languages with different build systems: -* Scala - [scala-stream](./scala-stream) +* Julia - [JuliaStream.jl](./src/julia/JuliaStream.jl) +* Java - [java-stream](./src/java/java-stream) +* Scala - [scala-stream](./src/scala/scala-stream) How is this different to STREAM? -------------------------------- @@ -109,41 +113,10 @@ Alternatively, refer to the [CI script](./ci-test-compile.sh), which test-compil ### GNU Make -We have supplied a series of Makefiles, one for each programming model, to assist with building. -The Makefiles contain common build options, and should be simple to customise for your needs too. +Support for Make has been removed from 4.0 onwards. +However, as the build process only involves a few source files, the required compile commands can be extracted from the CI output. -General usage is `make -C src/` -Common compiler flags and names can be set by passing a `COMPILER` option to Make, e.g. `make COMPILER=GNU`. -Some models allow specifying a CPU or GPU style target, and this can be set by passing a `TARGET` option to Make, e.g. `make TARGET=GPU`. - -Pass in extra flags via the `EXTRA_FLAGS` option. - -The binaries are named in the form `-stream`. - -#### Building Kokkos for Make - -Kokkos version >= 3 requires setting the `KOKKOS_PATH` flag to the *source* directory of a distribution. -For example: - -``` -cd -wget https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz -tar -xvf 3.1.01.tar.gz # should end up with ~/kokkos-3.1.01 -cd BabelStream -make -C src/kokkos KOKKOS_PATH=~/kokkos-3.1.01 -``` -See make output for more information on supported flags. - -#### Building RAJA for Make - -We use the following command to build RAJA using the Intel Compiler. -``` -cmake .. -DCMAKE_INSTALL_PREFIX= -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_COMPILER=icpc -DRAJA_PTR="RAJA_USE_RESTRICT_ALIGNED_PTR" -DCMAKE_BUILD_TYPE=ICCBuild -DRAJA_ENABLE_TESTS=Off -``` -For building with CUDA support, we use the following command. -``` -cmake .. -DCMAKE_INSTALL_PREFIX= -DRAJA_PTR="RAJA_USE_RESTRICT_ALIGNED_PTR" -DRAJA_ENABLE_CUDA=1 -DRAJA_ENABLE_TESTS=Off -``` + Results ------- diff --git a/TBB.make b/TBB.make deleted file mode 100644 index c224a5a..0000000 --- a/TBB.make +++ /dev/null @@ -1,56 +0,0 @@ - -ifndef COMPILER -define compiler_help -Set COMPILER to change flags (defaulting to GNU). -Available compilers are: - GNU INTEL INTEL_LEGACY - -endef -$(info $(compiler_help)) -COMPILER=GNU -endif - - -CXX_GNU = g++ -CXX_INTEL = icpx -CXX_INTEL_LEGACY = icpc -CXX = $(COMPILER_$(COMPILER)) - -CXXFLAGS_GNU = -march=native -CXXFLAGS_INTEL = -march=native -CXXFLAGS_INTEL_LEGACY = -qopt-streaming-stores=always - -CXX = $(CXX_$(COMPILER)) -CXXFLAGS = -std=c++11 -O3 $(CXXFLAGS_$(COMPILER)) - - - -ifndef PARTITIONER -define partitioner_help -Set PARTITIONER to select TBB's partitioner. -Partitioner specifies how a loop template should partition its work among threads. - -Available options: - AUTO - Optimize range subdivision based on work-stealing events. - AFFINITY - Proportional splitting that optimizes for cache affinity. - STATIC - Distribute work uniformly with no additional load balancing. - SIMPLE - Recursively split its range until it cannot be further subdivided. - -See https://spec.oneapi.com/versions/latest/elements/oneTBB/source/algorithms.html#partitioners -for more details. - -endef -$(info $(partitioner_help)) -PARTITIONER=AUTO -endif - -PARTITIONER_MODE = -DPARTITIONER_$(PARTITIONER) - - -tbb-stream: main.cpp TBBStream.cpp - $(CXX) -DTBB $(PARTITIONER_MODE) $(CXXFLAGS) $^ $(EXTRA_FLAGS) -I$(TBB_DIR)/include -Wl,-rpath,$(TBB_DIR)/lib/intel64/gcc4.8 $(TBB_DIR)/lib/intel64/gcc4.8/libtbb.so -o $@ - -.PHONY: clean -clean: - rm -f tbb-stream - diff --git a/legacy/Makefile b/legacy/Makefile deleted file mode 100644 index 8047ae1..0000000 --- a/legacy/Makefile +++ /dev/null @@ -1,22 +0,0 @@ - -HCC = hcc - -CXXFLAGS+=-O3 $(shell hcc-config --cxxflags) -LDFLAGS+=$(shell hcc-config --ldflags) - -ifdef TBSIZE -CXXFLAGS+=-DVIRTUALTILESIZE=$(TBSIZE) -endif - -ifdef NTILES -CXXFLAGS+=-DNTILES=$(TBSIZE) -endif - - -hc-stream: ../main.cpp HCStream.cpp - $(HCC) $(CXXFLAGS) -DHC $^ $(LDFLAGS) $(EXTRA_FLAGS) -o $@ -I. -I.. - - -.PHONY: clean -clean: - rm -f hc-stream diff --git a/src/CL/cl.h b/src/CL/cl.h deleted file mode 100644 index f33f999..0000000 --- a/src/CL/cl.h +++ /dev/null @@ -1,1902 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ - -#ifndef __OPENCL_CL_H -#define __OPENCL_CL_H - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/******************************************************************************/ - -typedef struct _cl_platform_id * cl_platform_id; -typedef struct _cl_device_id * cl_device_id; -typedef struct _cl_context * cl_context; -typedef struct _cl_command_queue * cl_command_queue; -typedef struct _cl_mem * cl_mem; -typedef struct _cl_program * cl_program; -typedef struct _cl_kernel * cl_kernel; -typedef struct _cl_event * cl_event; -typedef struct _cl_sampler * cl_sampler; - -typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ -typedef cl_ulong cl_bitfield; -typedef cl_bitfield cl_device_type; -typedef cl_uint cl_platform_info; -typedef cl_uint cl_device_info; -typedef cl_bitfield cl_device_fp_config; -typedef cl_uint cl_device_mem_cache_type; -typedef cl_uint cl_device_local_mem_type; -typedef cl_bitfield cl_device_exec_capabilities; -#ifdef CL_VERSION_2_0 -typedef cl_bitfield cl_device_svm_capabilities; -#endif -typedef cl_bitfield cl_command_queue_properties; -#ifdef CL_VERSION_1_2 -typedef intptr_t cl_device_partition_property; -typedef cl_bitfield cl_device_affinity_domain; -#endif - -typedef intptr_t cl_context_properties; -typedef cl_uint cl_context_info; -#ifdef CL_VERSION_2_0 -typedef cl_bitfield cl_queue_properties; -#endif -typedef cl_uint cl_command_queue_info; -typedef cl_uint cl_channel_order; -typedef cl_uint cl_channel_type; -typedef cl_bitfield cl_mem_flags; -#ifdef CL_VERSION_2_0 -typedef cl_bitfield cl_svm_mem_flags; -#endif -typedef cl_uint cl_mem_object_type; -typedef cl_uint cl_mem_info; -#ifdef CL_VERSION_1_2 -typedef cl_bitfield cl_mem_migration_flags; -#endif -typedef cl_uint cl_image_info; -#ifdef CL_VERSION_1_1 -typedef cl_uint cl_buffer_create_type; -#endif -typedef cl_uint cl_addressing_mode; -typedef cl_uint cl_filter_mode; -typedef cl_uint cl_sampler_info; -typedef cl_bitfield cl_map_flags; -#ifdef CL_VERSION_2_0 -typedef intptr_t cl_pipe_properties; -typedef cl_uint cl_pipe_info; -#endif -typedef cl_uint cl_program_info; -typedef cl_uint cl_program_build_info; -#ifdef CL_VERSION_1_2 -typedef cl_uint cl_program_binary_type; -#endif -typedef cl_int cl_build_status; -typedef cl_uint cl_kernel_info; -#ifdef CL_VERSION_1_2 -typedef cl_uint cl_kernel_arg_info; -typedef cl_uint cl_kernel_arg_address_qualifier; -typedef cl_uint cl_kernel_arg_access_qualifier; -typedef cl_bitfield cl_kernel_arg_type_qualifier; -#endif -typedef cl_uint cl_kernel_work_group_info; -#ifdef CL_VERSION_2_1 -typedef cl_uint cl_kernel_sub_group_info; -#endif -typedef cl_uint cl_event_info; -typedef cl_uint cl_command_type; -typedef cl_uint cl_profiling_info; -#ifdef CL_VERSION_2_0 -typedef cl_bitfield cl_sampler_properties; -typedef cl_uint cl_kernel_exec_info; -#endif -#ifdef CL_VERSION_3_0 -typedef cl_bitfield cl_device_atomic_capabilities; -typedef cl_uint cl_khronos_vendor_id; -typedef cl_bitfield cl_mem_properties; -typedef cl_uint cl_version; -#endif - -typedef struct _cl_image_format { - cl_channel_order image_channel_order; - cl_channel_type image_channel_data_type; -} cl_image_format; - -#ifdef CL_VERSION_1_2 - -typedef struct _cl_image_desc { - cl_mem_object_type image_type; - size_t image_width; - size_t image_height; - size_t image_depth; - size_t image_array_size; - size_t image_row_pitch; - size_t image_slice_pitch; - cl_uint num_mip_levels; - cl_uint num_samples; -#ifdef CL_VERSION_2_0 -#ifdef __GNUC__ - __extension__ /* Prevents warnings about anonymous union in -pedantic builds */ -#endif -#ifdef _MSC_VER -#pragma warning( push ) -#pragma warning( disable : 4201 ) /* Prevents warning about nameless struct/union in /W4 /Za builds */ -#endif - union { -#endif - cl_mem buffer; -#ifdef CL_VERSION_2_0 - cl_mem mem_object; - }; -#ifdef _MSC_VER -#pragma warning( pop ) -#endif -#endif -} cl_image_desc; - -#endif - -#ifdef CL_VERSION_1_1 - -typedef struct _cl_buffer_region { - size_t origin; - size_t size; -} cl_buffer_region; - -#endif - -#ifdef CL_VERSION_3_0 - -#define CL_NAME_VERSION_MAX_NAME_SIZE 64 - -typedef struct _cl_name_version { - cl_version version; - char name[CL_NAME_VERSION_MAX_NAME_SIZE]; -} cl_name_version; - -#endif - -/******************************************************************************/ - -/* Error Codes */ -#define CL_SUCCESS 0 -#define CL_DEVICE_NOT_FOUND -1 -#define CL_DEVICE_NOT_AVAILABLE -2 -#define CL_COMPILER_NOT_AVAILABLE -3 -#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 -#define CL_OUT_OF_RESOURCES -5 -#define CL_OUT_OF_HOST_MEMORY -6 -#define CL_PROFILING_INFO_NOT_AVAILABLE -7 -#define CL_MEM_COPY_OVERLAP -8 -#define CL_IMAGE_FORMAT_MISMATCH -9 -#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 -#define CL_BUILD_PROGRAM_FAILURE -11 -#define CL_MAP_FAILURE -12 -#ifdef CL_VERSION_1_1 -#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 -#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 -#endif -#ifdef CL_VERSION_1_2 -#define CL_COMPILE_PROGRAM_FAILURE -15 -#define CL_LINKER_NOT_AVAILABLE -16 -#define CL_LINK_PROGRAM_FAILURE -17 -#define CL_DEVICE_PARTITION_FAILED -18 -#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 -#endif - -#define CL_INVALID_VALUE -30 -#define CL_INVALID_DEVICE_TYPE -31 -#define CL_INVALID_PLATFORM -32 -#define CL_INVALID_DEVICE -33 -#define CL_INVALID_CONTEXT -34 -#define CL_INVALID_QUEUE_PROPERTIES -35 -#define CL_INVALID_COMMAND_QUEUE -36 -#define CL_INVALID_HOST_PTR -37 -#define CL_INVALID_MEM_OBJECT -38 -#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 -#define CL_INVALID_IMAGE_SIZE -40 -#define CL_INVALID_SAMPLER -41 -#define CL_INVALID_BINARY -42 -#define CL_INVALID_BUILD_OPTIONS -43 -#define CL_INVALID_PROGRAM -44 -#define CL_INVALID_PROGRAM_EXECUTABLE -45 -#define CL_INVALID_KERNEL_NAME -46 -#define CL_INVALID_KERNEL_DEFINITION -47 -#define CL_INVALID_KERNEL -48 -#define CL_INVALID_ARG_INDEX -49 -#define CL_INVALID_ARG_VALUE -50 -#define CL_INVALID_ARG_SIZE -51 -#define CL_INVALID_KERNEL_ARGS -52 -#define CL_INVALID_WORK_DIMENSION -53 -#define CL_INVALID_WORK_GROUP_SIZE -54 -#define CL_INVALID_WORK_ITEM_SIZE -55 -#define CL_INVALID_GLOBAL_OFFSET -56 -#define CL_INVALID_EVENT_WAIT_LIST -57 -#define CL_INVALID_EVENT -58 -#define CL_INVALID_OPERATION -59 -#define CL_INVALID_GL_OBJECT -60 -#define CL_INVALID_BUFFER_SIZE -61 -#define CL_INVALID_MIP_LEVEL -62 -#define CL_INVALID_GLOBAL_WORK_SIZE -63 -#ifdef CL_VERSION_1_1 -#define CL_INVALID_PROPERTY -64 -#endif -#ifdef CL_VERSION_1_2 -#define CL_INVALID_IMAGE_DESCRIPTOR -65 -#define CL_INVALID_COMPILER_OPTIONS -66 -#define CL_INVALID_LINKER_OPTIONS -67 -#define CL_INVALID_DEVICE_PARTITION_COUNT -68 -#endif -#ifdef CL_VERSION_2_0 -#define CL_INVALID_PIPE_SIZE -69 -#define CL_INVALID_DEVICE_QUEUE -70 -#endif -#ifdef CL_VERSION_2_2 -#define CL_INVALID_SPEC_ID -71 -#define CL_MAX_SIZE_RESTRICTION_EXCEEDED -72 -#endif - - -/* cl_bool */ -#define CL_FALSE 0 -#define CL_TRUE 1 -#ifdef CL_VERSION_1_2 -#define CL_BLOCKING CL_TRUE -#define CL_NON_BLOCKING CL_FALSE -#endif - -/* cl_platform_info */ -#define CL_PLATFORM_PROFILE 0x0900 -#define CL_PLATFORM_VERSION 0x0901 -#define CL_PLATFORM_NAME 0x0902 -#define CL_PLATFORM_VENDOR 0x0903 -#define CL_PLATFORM_EXTENSIONS 0x0904 -#ifdef CL_VERSION_2_1 -#define CL_PLATFORM_HOST_TIMER_RESOLUTION 0x0905 -#endif -#ifdef CL_VERSION_3_0 -#define CL_PLATFORM_NUMERIC_VERSION 0x0906 -#define CL_PLATFORM_EXTENSIONS_WITH_VERSION 0x0907 -#endif - -/* cl_device_type - bitfield */ -#define CL_DEVICE_TYPE_DEFAULT (1 << 0) -#define CL_DEVICE_TYPE_CPU (1 << 1) -#define CL_DEVICE_TYPE_GPU (1 << 2) -#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) -#ifdef CL_VERSION_1_2 -#define CL_DEVICE_TYPE_CUSTOM (1 << 4) -#endif -#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF - -/* cl_device_info */ -#define CL_DEVICE_TYPE 0x1000 -#define CL_DEVICE_VENDOR_ID 0x1001 -#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 -#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 -#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 -#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B -#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C -#define CL_DEVICE_ADDRESS_BITS 0x100D -#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E -#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F -#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 -#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 -#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 -#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 -#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 -#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 -#define CL_DEVICE_IMAGE_SUPPORT 0x1016 -#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 -#define CL_DEVICE_MAX_SAMPLERS 0x1018 -#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 -#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A -#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B -#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C -#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D -#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E -#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F -#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 -#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 -#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 -#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 -#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 -#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 -#define CL_DEVICE_ENDIAN_LITTLE 0x1026 -#define CL_DEVICE_AVAILABLE 0x1027 -#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 -#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 -#define CL_DEVICE_QUEUE_PROPERTIES 0x102A /* deprecated */ -#ifdef CL_VERSION_2_0 -#define CL_DEVICE_QUEUE_ON_HOST_PROPERTIES 0x102A -#endif -#define CL_DEVICE_NAME 0x102B -#define CL_DEVICE_VENDOR 0x102C -#define CL_DRIVER_VERSION 0x102D -#define CL_DEVICE_PROFILE 0x102E -#define CL_DEVICE_VERSION 0x102F -#define CL_DEVICE_EXTENSIONS 0x1030 -#define CL_DEVICE_PLATFORM 0x1031 -#ifdef CL_VERSION_1_2 -#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 -#endif -/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG which is already defined in "cl_ext.h" */ -#ifdef CL_VERSION_1_1 -#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 -#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 /* deprecated */ -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B -#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C -#define CL_DEVICE_OPENCL_C_VERSION 0x103D -#endif -#ifdef CL_VERSION_1_2 -#define CL_DEVICE_LINKER_AVAILABLE 0x103E -#define CL_DEVICE_BUILT_IN_KERNELS 0x103F -#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 -#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 -#define CL_DEVICE_PARENT_DEVICE 0x1042 -#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 -#define CL_DEVICE_PARTITION_PROPERTIES 0x1044 -#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 -#define CL_DEVICE_PARTITION_TYPE 0x1046 -#define CL_DEVICE_REFERENCE_COUNT 0x1047 -#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 -#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 -#endif -#ifdef CL_VERSION_2_0 -#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A -#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B -#define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS 0x104C -#define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE 0x104D -#define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES 0x104E -#define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE 0x104F -#define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE 0x1050 -#define CL_DEVICE_MAX_ON_DEVICE_QUEUES 0x1051 -#define CL_DEVICE_MAX_ON_DEVICE_EVENTS 0x1052 -#define CL_DEVICE_SVM_CAPABILITIES 0x1053 -#define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE 0x1054 -#define CL_DEVICE_MAX_PIPE_ARGS 0x1055 -#define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS 0x1056 -#define CL_DEVICE_PIPE_MAX_PACKET_SIZE 0x1057 -#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT 0x1058 -#define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT 0x1059 -#define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT 0x105A -#endif -#ifdef CL_VERSION_2_1 -#define CL_DEVICE_IL_VERSION 0x105B -#define CL_DEVICE_MAX_NUM_SUB_GROUPS 0x105C -#define CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS 0x105D -#endif -#ifdef CL_VERSION_3_0 -#define CL_DEVICE_NUMERIC_VERSION 0x105E -#define CL_DEVICE_EXTENSIONS_WITH_VERSION 0x1060 -#define CL_DEVICE_ILS_WITH_VERSION 0x1061 -#define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION 0x1062 -#define CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES 0x1063 -#define CL_DEVICE_ATOMIC_FENCE_CAPABILITIES 0x1064 -#define CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT 0x1065 -#define CL_DEVICE_OPENCL_C_ALL_VERSIONS 0x1066 -#define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x1067 -#define CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT 0x1068 -#define CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT 0x1069 -/* 0x106A to 0x106E - Reserved for upcoming KHR extension */ -#define CL_DEVICE_OPENCL_C_FEATURES 0x106F -#define CL_DEVICE_DEVICE_ENQUEUE_SUPPORT 0x1070 -#define CL_DEVICE_PIPE_SUPPORT 0x1071 -#endif - -/* cl_device_fp_config - bitfield */ -#define CL_FP_DENORM (1 << 0) -#define CL_FP_INF_NAN (1 << 1) -#define CL_FP_ROUND_TO_NEAREST (1 << 2) -#define CL_FP_ROUND_TO_ZERO (1 << 3) -#define CL_FP_ROUND_TO_INF (1 << 4) -#define CL_FP_FMA (1 << 5) -#ifdef CL_VERSION_1_1 -#define CL_FP_SOFT_FLOAT (1 << 6) -#endif -#ifdef CL_VERSION_1_2 -#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) -#endif - -/* cl_device_mem_cache_type */ -#define CL_NONE 0x0 -#define CL_READ_ONLY_CACHE 0x1 -#define CL_READ_WRITE_CACHE 0x2 - -/* cl_device_local_mem_type */ -#define CL_LOCAL 0x1 -#define CL_GLOBAL 0x2 - -/* cl_device_exec_capabilities - bitfield */ -#define CL_EXEC_KERNEL (1 << 0) -#define CL_EXEC_NATIVE_KERNEL (1 << 1) - -/* cl_command_queue_properties - bitfield */ -#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) -#define CL_QUEUE_PROFILING_ENABLE (1 << 1) -#ifdef CL_VERSION_2_0 -#define CL_QUEUE_ON_DEVICE (1 << 2) -#define CL_QUEUE_ON_DEVICE_DEFAULT (1 << 3) -#endif - -/* cl_context_info */ -#define CL_CONTEXT_REFERENCE_COUNT 0x1080 -#define CL_CONTEXT_DEVICES 0x1081 -#define CL_CONTEXT_PROPERTIES 0x1082 -#ifdef CL_VERSION_1_1 -#define CL_CONTEXT_NUM_DEVICES 0x1083 -#endif - -/* cl_context_properties */ -#define CL_CONTEXT_PLATFORM 0x1084 -#ifdef CL_VERSION_1_2 -#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 -#endif - -#ifdef CL_VERSION_1_2 - -/* cl_device_partition_property */ -#define CL_DEVICE_PARTITION_EQUALLY 0x1086 -#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 -#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 -#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 - -#endif - -#ifdef CL_VERSION_1_2 - -/* cl_device_affinity_domain */ -#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) -#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) -#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) -#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) -#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) -#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) - -#endif - -#ifdef CL_VERSION_2_0 - -/* cl_device_svm_capabilities */ -#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) -#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) -#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) -#define CL_DEVICE_SVM_ATOMICS (1 << 3) - -#endif - -/* cl_command_queue_info */ -#define CL_QUEUE_CONTEXT 0x1090 -#define CL_QUEUE_DEVICE 0x1091 -#define CL_QUEUE_REFERENCE_COUNT 0x1092 -#define CL_QUEUE_PROPERTIES 0x1093 -#ifdef CL_VERSION_2_0 -#define CL_QUEUE_SIZE 0x1094 -#endif -#ifdef CL_VERSION_2_1 -#define CL_QUEUE_DEVICE_DEFAULT 0x1095 -#endif -#ifdef CL_VERSION_3_0 -#define CL_QUEUE_PROPERTIES_ARRAY 0x1098 -#endif - -/* cl_mem_flags and cl_svm_mem_flags - bitfield */ -#define CL_MEM_READ_WRITE (1 << 0) -#define CL_MEM_WRITE_ONLY (1 << 1) -#define CL_MEM_READ_ONLY (1 << 2) -#define CL_MEM_USE_HOST_PTR (1 << 3) -#define CL_MEM_ALLOC_HOST_PTR (1 << 4) -#define CL_MEM_COPY_HOST_PTR (1 << 5) -/* reserved (1 << 6) */ -#ifdef CL_VERSION_1_2 -#define CL_MEM_HOST_WRITE_ONLY (1 << 7) -#define CL_MEM_HOST_READ_ONLY (1 << 8) -#define CL_MEM_HOST_NO_ACCESS (1 << 9) -#endif -#ifdef CL_VERSION_2_0 -#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10) /* used by cl_svm_mem_flags only */ -#define CL_MEM_SVM_ATOMICS (1 << 11) /* used by cl_svm_mem_flags only */ -#define CL_MEM_KERNEL_READ_AND_WRITE (1 << 12) -#endif - -#ifdef CL_VERSION_1_2 - -/* cl_mem_migration_flags - bitfield */ -#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) -#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) - -#endif - -/* cl_channel_order */ -#define CL_R 0x10B0 -#define CL_A 0x10B1 -#define CL_RG 0x10B2 -#define CL_RA 0x10B3 -#define CL_RGB 0x10B4 -#define CL_RGBA 0x10B5 -#define CL_BGRA 0x10B6 -#define CL_ARGB 0x10B7 -#define CL_INTENSITY 0x10B8 -#define CL_LUMINANCE 0x10B9 -#ifdef CL_VERSION_1_1 -#define CL_Rx 0x10BA -#define CL_RGx 0x10BB -#define CL_RGBx 0x10BC -#endif -#ifdef CL_VERSION_1_2 -#define CL_DEPTH 0x10BD -#define CL_DEPTH_STENCIL 0x10BE -#endif -#ifdef CL_VERSION_2_0 -#define CL_sRGB 0x10BF -#define CL_sRGBx 0x10C0 -#define CL_sRGBA 0x10C1 -#define CL_sBGRA 0x10C2 -#define CL_ABGR 0x10C3 -#endif - -/* cl_channel_type */ -#define CL_SNORM_INT8 0x10D0 -#define CL_SNORM_INT16 0x10D1 -#define CL_UNORM_INT8 0x10D2 -#define CL_UNORM_INT16 0x10D3 -#define CL_UNORM_SHORT_565 0x10D4 -#define CL_UNORM_SHORT_555 0x10D5 -#define CL_UNORM_INT_101010 0x10D6 -#define CL_SIGNED_INT8 0x10D7 -#define CL_SIGNED_INT16 0x10D8 -#define CL_SIGNED_INT32 0x10D9 -#define CL_UNSIGNED_INT8 0x10DA -#define CL_UNSIGNED_INT16 0x10DB -#define CL_UNSIGNED_INT32 0x10DC -#define CL_HALF_FLOAT 0x10DD -#define CL_FLOAT 0x10DE -#ifdef CL_VERSION_1_2 -#define CL_UNORM_INT24 0x10DF -#endif -#ifdef CL_VERSION_2_1 -#define CL_UNORM_INT_101010_2 0x10E0 -#endif - -/* cl_mem_object_type */ -#define CL_MEM_OBJECT_BUFFER 0x10F0 -#define CL_MEM_OBJECT_IMAGE2D 0x10F1 -#define CL_MEM_OBJECT_IMAGE3D 0x10F2 -#ifdef CL_VERSION_1_2 -#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 -#define CL_MEM_OBJECT_IMAGE1D 0x10F4 -#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 -#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 -#endif -#ifdef CL_VERSION_2_0 -#define CL_MEM_OBJECT_PIPE 0x10F7 -#endif - -/* cl_mem_info */ -#define CL_MEM_TYPE 0x1100 -#define CL_MEM_FLAGS 0x1101 -#define CL_MEM_SIZE 0x1102 -#define CL_MEM_HOST_PTR 0x1103 -#define CL_MEM_MAP_COUNT 0x1104 -#define CL_MEM_REFERENCE_COUNT 0x1105 -#define CL_MEM_CONTEXT 0x1106 -#ifdef CL_VERSION_1_1 -#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 -#define CL_MEM_OFFSET 0x1108 -#endif -#ifdef CL_VERSION_2_0 -#define CL_MEM_USES_SVM_POINTER 0x1109 -#endif -#ifdef CL_VERSION_3_0 -#define CL_MEM_PROPERTIES 0x110A -#endif - -/* cl_image_info */ -#define CL_IMAGE_FORMAT 0x1110 -#define CL_IMAGE_ELEMENT_SIZE 0x1111 -#define CL_IMAGE_ROW_PITCH 0x1112 -#define CL_IMAGE_SLICE_PITCH 0x1113 -#define CL_IMAGE_WIDTH 0x1114 -#define CL_IMAGE_HEIGHT 0x1115 -#define CL_IMAGE_DEPTH 0x1116 -#ifdef CL_VERSION_1_2 -#define CL_IMAGE_ARRAY_SIZE 0x1117 -#define CL_IMAGE_BUFFER 0x1118 -#define CL_IMAGE_NUM_MIP_LEVELS 0x1119 -#define CL_IMAGE_NUM_SAMPLES 0x111A -#endif - - -/* cl_pipe_info */ -#ifdef CL_VERSION_2_0 -#define CL_PIPE_PACKET_SIZE 0x1120 -#define CL_PIPE_MAX_PACKETS 0x1121 -#endif -#ifdef CL_VERSION_3_0 -#define CL_PIPE_PROPERTIES 0x1122 -#endif - -/* cl_addressing_mode */ -#define CL_ADDRESS_NONE 0x1130 -#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 -#define CL_ADDRESS_CLAMP 0x1132 -#define CL_ADDRESS_REPEAT 0x1133 -#ifdef CL_VERSION_1_1 -#define CL_ADDRESS_MIRRORED_REPEAT 0x1134 -#endif - -/* cl_filter_mode */ -#define CL_FILTER_NEAREST 0x1140 -#define CL_FILTER_LINEAR 0x1141 - -/* cl_sampler_info */ -#define CL_SAMPLER_REFERENCE_COUNT 0x1150 -#define CL_SAMPLER_CONTEXT 0x1151 -#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 -#define CL_SAMPLER_ADDRESSING_MODE 0x1153 -#define CL_SAMPLER_FILTER_MODE 0x1154 -#ifdef CL_VERSION_2_0 -/* These enumerants are for the cl_khr_mipmap_image extension. - They have since been added to cl_ext.h with an appropriate - KHR suffix, but are left here for backwards compatibility. */ -#define CL_SAMPLER_MIP_FILTER_MODE 0x1155 -#define CL_SAMPLER_LOD_MIN 0x1156 -#define CL_SAMPLER_LOD_MAX 0x1157 -#endif -#ifdef CL_VERSION_3_0 -#define CL_SAMPLER_PROPERTIES 0x1158 -#endif - -/* cl_map_flags - bitfield */ -#define CL_MAP_READ (1 << 0) -#define CL_MAP_WRITE (1 << 1) -#ifdef CL_VERSION_1_2 -#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) -#endif - -/* cl_program_info */ -#define CL_PROGRAM_REFERENCE_COUNT 0x1160 -#define CL_PROGRAM_CONTEXT 0x1161 -#define CL_PROGRAM_NUM_DEVICES 0x1162 -#define CL_PROGRAM_DEVICES 0x1163 -#define CL_PROGRAM_SOURCE 0x1164 -#define CL_PROGRAM_BINARY_SIZES 0x1165 -#define CL_PROGRAM_BINARIES 0x1166 -#ifdef CL_VERSION_1_2 -#define CL_PROGRAM_NUM_KERNELS 0x1167 -#define CL_PROGRAM_KERNEL_NAMES 0x1168 -#endif -#ifdef CL_VERSION_2_1 -#define CL_PROGRAM_IL 0x1169 -#endif -#ifdef CL_VERSION_2_2 -#define CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT 0x116A -#define CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT 0x116B -#endif - -/* cl_program_build_info */ -#define CL_PROGRAM_BUILD_STATUS 0x1181 -#define CL_PROGRAM_BUILD_OPTIONS 0x1182 -#define CL_PROGRAM_BUILD_LOG 0x1183 -#ifdef CL_VERSION_1_2 -#define CL_PROGRAM_BINARY_TYPE 0x1184 -#endif -#ifdef CL_VERSION_2_0 -#define CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE 0x1185 -#endif - -#ifdef CL_VERSION_1_2 - -/* cl_program_binary_type */ -#define CL_PROGRAM_BINARY_TYPE_NONE 0x0 -#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 -#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 -#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 - -#endif - -/* cl_build_status */ -#define CL_BUILD_SUCCESS 0 -#define CL_BUILD_NONE -1 -#define CL_BUILD_ERROR -2 -#define CL_BUILD_IN_PROGRESS -3 - -/* cl_kernel_info */ -#define CL_KERNEL_FUNCTION_NAME 0x1190 -#define CL_KERNEL_NUM_ARGS 0x1191 -#define CL_KERNEL_REFERENCE_COUNT 0x1192 -#define CL_KERNEL_CONTEXT 0x1193 -#define CL_KERNEL_PROGRAM 0x1194 -#ifdef CL_VERSION_1_2 -#define CL_KERNEL_ATTRIBUTES 0x1195 -#endif - -#ifdef CL_VERSION_1_2 - -/* cl_kernel_arg_info */ -#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 -#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 -#define CL_KERNEL_ARG_TYPE_NAME 0x1198 -#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 -#define CL_KERNEL_ARG_NAME 0x119A - -#endif - -#ifdef CL_VERSION_1_2 - -/* cl_kernel_arg_address_qualifier */ -#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B -#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C -#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D -#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E - -#endif - -#ifdef CL_VERSION_1_2 - -/* cl_kernel_arg_access_qualifier */ -#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 -#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 -#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 -#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 - -#endif - -#ifdef CL_VERSION_1_2 - -/* cl_kernel_arg_type_qualifier */ -#define CL_KERNEL_ARG_TYPE_NONE 0 -#define CL_KERNEL_ARG_TYPE_CONST (1 << 0) -#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) -#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) -#ifdef CL_VERSION_2_0 -#define CL_KERNEL_ARG_TYPE_PIPE (1 << 3) -#endif - -#endif - -/* cl_kernel_work_group_info */ -#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 -#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 -#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 -#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 -#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 -#ifdef CL_VERSION_1_2 -#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 -#endif - -#ifdef CL_VERSION_2_1 - -/* cl_kernel_sub_group_info */ -#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE 0x2033 -#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE 0x2034 -#define CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT 0x11B8 -#define CL_KERNEL_MAX_NUM_SUB_GROUPS 0x11B9 -#define CL_KERNEL_COMPILE_NUM_SUB_GROUPS 0x11BA - -#endif - -#ifdef CL_VERSION_2_0 - -/* cl_kernel_exec_info */ -#define CL_KERNEL_EXEC_INFO_SVM_PTRS 0x11B6 -#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM 0x11B7 - -#endif - -/* cl_event_info */ -#define CL_EVENT_COMMAND_QUEUE 0x11D0 -#define CL_EVENT_COMMAND_TYPE 0x11D1 -#define CL_EVENT_REFERENCE_COUNT 0x11D2 -#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 -#ifdef CL_VERSION_1_1 -#define CL_EVENT_CONTEXT 0x11D4 -#endif - -/* cl_command_type */ -#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 -#define CL_COMMAND_TASK 0x11F1 -#define CL_COMMAND_NATIVE_KERNEL 0x11F2 -#define CL_COMMAND_READ_BUFFER 0x11F3 -#define CL_COMMAND_WRITE_BUFFER 0x11F4 -#define CL_COMMAND_COPY_BUFFER 0x11F5 -#define CL_COMMAND_READ_IMAGE 0x11F6 -#define CL_COMMAND_WRITE_IMAGE 0x11F7 -#define CL_COMMAND_COPY_IMAGE 0x11F8 -#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 -#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA -#define CL_COMMAND_MAP_BUFFER 0x11FB -#define CL_COMMAND_MAP_IMAGE 0x11FC -#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD -#define CL_COMMAND_MARKER 0x11FE -#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF -#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 -#ifdef CL_VERSION_1_1 -#define CL_COMMAND_READ_BUFFER_RECT 0x1201 -#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 -#define CL_COMMAND_COPY_BUFFER_RECT 0x1203 -#define CL_COMMAND_USER 0x1204 -#endif -#ifdef CL_VERSION_1_2 -#define CL_COMMAND_BARRIER 0x1205 -#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 -#define CL_COMMAND_FILL_BUFFER 0x1207 -#define CL_COMMAND_FILL_IMAGE 0x1208 -#endif -#ifdef CL_VERSION_2_0 -#define CL_COMMAND_SVM_FREE 0x1209 -#define CL_COMMAND_SVM_MEMCPY 0x120A -#define CL_COMMAND_SVM_MEMFILL 0x120B -#define CL_COMMAND_SVM_MAP 0x120C -#define CL_COMMAND_SVM_UNMAP 0x120D -#endif -#ifdef CL_VERSION_3_0 -#define CL_COMMAND_SVM_MIGRATE_MEM 0x120E -#endif - -/* command execution status */ -#define CL_COMPLETE 0x0 -#define CL_RUNNING 0x1 -#define CL_SUBMITTED 0x2 -#define CL_QUEUED 0x3 - -/* cl_buffer_create_type */ -#ifdef CL_VERSION_1_1 -#define CL_BUFFER_CREATE_TYPE_REGION 0x1220 -#endif - -/* cl_profiling_info */ -#define CL_PROFILING_COMMAND_QUEUED 0x1280 -#define CL_PROFILING_COMMAND_SUBMIT 0x1281 -#define CL_PROFILING_COMMAND_START 0x1282 -#define CL_PROFILING_COMMAND_END 0x1283 -#ifdef CL_VERSION_2_0 -#define CL_PROFILING_COMMAND_COMPLETE 0x1284 -#endif - -/* cl_device_atomic_capabilities - bitfield */ -#ifdef CL_VERSION_3_0 -#define CL_DEVICE_ATOMIC_ORDER_RELAXED (1 << 0) -#define CL_DEVICE_ATOMIC_ORDER_ACQ_REL (1 << 1) -#define CL_DEVICE_ATOMIC_ORDER_SEQ_CST (1 << 2) -#define CL_DEVICE_ATOMIC_SCOPE_WORK_ITEM (1 << 3) -#define CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP (1 << 4) -#define CL_DEVICE_ATOMIC_SCOPE_DEVICE (1 << 5) -#define CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES (1 << 6) -#endif - -/* cl_khronos_vendor_id */ -#define CL_KHRONOS_VENDOR_ID_CODEPLAY 0x10004 - -#ifdef CL_VERSION_3_0 - -/* cl_version */ -#define CL_VERSION_MAJOR_BITS (10) -#define CL_VERSION_MINOR_BITS (10) -#define CL_VERSION_PATCH_BITS (12) - -#define CL_VERSION_MAJOR_MASK ((1 << CL_VERSION_MAJOR_BITS) - 1) -#define CL_VERSION_MINOR_MASK ((1 << CL_VERSION_MINOR_BITS) - 1) -#define CL_VERSION_PATCH_MASK ((1 << CL_VERSION_PATCH_BITS) - 1) - -#define CL_VERSION_MAJOR(version) \ - ((version) >> (CL_VERSION_MINOR_BITS + CL_VERSION_PATCH_BITS)) - -#define CL_VERSION_MINOR(version) \ - (((version) >> CL_VERSION_PATCH_BITS) & CL_VERSION_MINOR_MASK) - -#define CL_VERSION_PATCH(version) ((version) & CL_VERSION_PATCH_MASK) - -#define CL_MAKE_VERSION(major, minor, patch) \ - ((((major) & CL_VERSION_MAJOR_MASK) \ - << (CL_VERSION_MINOR_BITS + CL_VERSION_PATCH_BITS)) | \ - (((minor) & CL_VERSION_MINOR_MASK) << CL_VERSION_PATCH_BITS) | \ - ((patch) & CL_VERSION_PATCH_MASK)) - -#endif - -/********************************************************************************************************/ - -/* Platform API */ -extern CL_API_ENTRY cl_int CL_API_CALL -clGetPlatformIDs(cl_uint num_entries, - cl_platform_id * platforms, - cl_uint * num_platforms) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetPlatformInfo(cl_platform_id platform, - cl_platform_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -/* Device APIs */ -extern CL_API_ENTRY cl_int CL_API_CALL -clGetDeviceIDs(cl_platform_id platform, - cl_device_type device_type, - cl_uint num_entries, - cl_device_id * devices, - cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetDeviceInfo(cl_device_id device, - cl_device_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -extern CL_API_ENTRY cl_int CL_API_CALL -clCreateSubDevices(cl_device_id in_device, - const cl_device_partition_property * properties, - cl_uint num_devices, - cl_device_id * out_devices, - cl_uint * num_devices_ret) CL_API_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clRetainDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clReleaseDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2; - -#endif - -#ifdef CL_VERSION_2_1 - -extern CL_API_ENTRY cl_int CL_API_CALL -clSetDefaultDeviceCommandQueue(cl_context context, - cl_device_id device, - cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetDeviceAndHostTimer(cl_device_id device, - cl_ulong* device_timestamp, - cl_ulong* host_timestamp) CL_API_SUFFIX__VERSION_2_1; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetHostTimer(cl_device_id device, - cl_ulong * host_timestamp) CL_API_SUFFIX__VERSION_2_1; - -#endif - -/* Context APIs */ -extern CL_API_ENTRY cl_context CL_API_CALL -clCreateContext(const cl_context_properties * properties, - cl_uint num_devices, - const cl_device_id * devices, - void (CL_CALLBACK * pfn_notify)(const char * errinfo, - const void * private_info, - size_t cb, - void * user_data), - void * user_data, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_context CL_API_CALL -clCreateContextFromType(const cl_context_properties * properties, - cl_device_type device_type, - void (CL_CALLBACK * pfn_notify)(const char * errinfo, - const void * private_info, - size_t cb, - void * user_data), - void * user_data, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clRetainContext(cl_context context) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clReleaseContext(cl_context context) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetContextInfo(cl_context context, - cl_context_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -/* Command Queue APIs */ - -#ifdef CL_VERSION_2_0 - -extern CL_API_ENTRY cl_command_queue CL_API_CALL -clCreateCommandQueueWithProperties(cl_context context, - cl_device_id device, - const cl_queue_properties * properties, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; - -#endif - -extern CL_API_ENTRY cl_int CL_API_CALL -clRetainCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clReleaseCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetCommandQueueInfo(cl_command_queue command_queue, - cl_command_queue_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -/* Memory Object APIs */ -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateBuffer(cl_context context, - cl_mem_flags flags, - size_t size, - void * host_ptr, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_1 - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateSubBuffer(cl_mem buffer, - cl_mem_flags flags, - cl_buffer_create_type buffer_create_type, - const void * buffer_create_info, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; - -#endif - -#ifdef CL_VERSION_1_2 - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateImage(cl_context context, - cl_mem_flags flags, - const cl_image_format * image_format, - const cl_image_desc * image_desc, - void * host_ptr, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -#endif - -#ifdef CL_VERSION_2_0 - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreatePipe(cl_context context, - cl_mem_flags flags, - cl_uint pipe_packet_size, - cl_uint pipe_max_packets, - const cl_pipe_properties * properties, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; - -#endif - -#ifdef CL_VERSION_3_0 - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateBufferWithProperties(cl_context context, - const cl_mem_properties * properties, - cl_mem_flags flags, - size_t size, - void * host_ptr, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_3_0; - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateImageWithProperties(cl_context context, - const cl_mem_properties * properties, - cl_mem_flags flags, - const cl_image_format * image_format, - const cl_image_desc * image_desc, - void * host_ptr, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_3_0; - -#endif - -extern CL_API_ENTRY cl_int CL_API_CALL -clRetainMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clReleaseMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetSupportedImageFormats(cl_context context, - cl_mem_flags flags, - cl_mem_object_type image_type, - cl_uint num_entries, - cl_image_format * image_formats, - cl_uint * num_image_formats) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetMemObjectInfo(cl_mem memobj, - cl_mem_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetImageInfo(cl_mem image, - cl_image_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_2_0 - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetPipeInfo(cl_mem pipe, - cl_pipe_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_2_0; - -#endif - -#ifdef CL_VERSION_1_1 - -extern CL_API_ENTRY cl_int CL_API_CALL -clSetMemObjectDestructorCallback(cl_mem memobj, - void (CL_CALLBACK * pfn_notify)(cl_mem memobj, - void * user_data), - void * user_data) CL_API_SUFFIX__VERSION_1_1; - -#endif - -/* SVM Allocation APIs */ - -#ifdef CL_VERSION_2_0 - -extern CL_API_ENTRY void * CL_API_CALL -clSVMAlloc(cl_context context, - cl_svm_mem_flags flags, - size_t size, - cl_uint alignment) CL_API_SUFFIX__VERSION_2_0; - -extern CL_API_ENTRY void CL_API_CALL -clSVMFree(cl_context context, - void * svm_pointer) CL_API_SUFFIX__VERSION_2_0; - -#endif - -/* Sampler APIs */ - -#ifdef CL_VERSION_2_0 - -extern CL_API_ENTRY cl_sampler CL_API_CALL -clCreateSamplerWithProperties(cl_context context, - const cl_sampler_properties * sampler_properties, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; - -#endif - -extern CL_API_ENTRY cl_int CL_API_CALL -clRetainSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clReleaseSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetSamplerInfo(cl_sampler sampler, - cl_sampler_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -/* Program Object APIs */ -extern CL_API_ENTRY cl_program CL_API_CALL -clCreateProgramWithSource(cl_context context, - cl_uint count, - const char ** strings, - const size_t * lengths, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_program CL_API_CALL -clCreateProgramWithBinary(cl_context context, - cl_uint num_devices, - const cl_device_id * device_list, - const size_t * lengths, - const unsigned char ** binaries, - cl_int * binary_status, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -extern CL_API_ENTRY cl_program CL_API_CALL -clCreateProgramWithBuiltInKernels(cl_context context, - cl_uint num_devices, - const cl_device_id * device_list, - const char * kernel_names, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -#endif - -#ifdef CL_VERSION_2_1 - -extern CL_API_ENTRY cl_program CL_API_CALL -clCreateProgramWithIL(cl_context context, - const void* il, - size_t length, - cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1; - -#endif - -extern CL_API_ENTRY cl_int CL_API_CALL -clRetainProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clReleaseProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clBuildProgram(cl_program program, - cl_uint num_devices, - const cl_device_id * device_list, - const char * options, - void (CL_CALLBACK * pfn_notify)(cl_program program, - void * user_data), - void * user_data) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -extern CL_API_ENTRY cl_int CL_API_CALL -clCompileProgram(cl_program program, - cl_uint num_devices, - const cl_device_id * device_list, - const char * options, - cl_uint num_input_headers, - const cl_program * input_headers, - const char ** header_include_names, - void (CL_CALLBACK * pfn_notify)(cl_program program, - void * user_data), - void * user_data) CL_API_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_program CL_API_CALL -clLinkProgram(cl_context context, - cl_uint num_devices, - const cl_device_id * device_list, - const char * options, - cl_uint num_input_programs, - const cl_program * input_programs, - void (CL_CALLBACK * pfn_notify)(cl_program program, - void * user_data), - void * user_data, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -#endif - -#ifdef CL_VERSION_2_2 - -extern CL_API_ENTRY cl_int CL_API_CALL -clSetProgramReleaseCallback(cl_program program, - void (CL_CALLBACK * pfn_notify)(cl_program program, - void * user_data), - void * user_data) CL_API_SUFFIX__VERSION_2_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clSetProgramSpecializationConstant(cl_program program, - cl_uint spec_id, - size_t spec_size, - const void* spec_value) CL_API_SUFFIX__VERSION_2_2; - -#endif - -#ifdef CL_VERSION_1_2 - -extern CL_API_ENTRY cl_int CL_API_CALL -clUnloadPlatformCompiler(cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; - -#endif - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetProgramInfo(cl_program program, - cl_program_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetProgramBuildInfo(cl_program program, - cl_device_id device, - cl_program_build_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -/* Kernel Object APIs */ -extern CL_API_ENTRY cl_kernel CL_API_CALL -clCreateKernel(cl_program program, - const char * kernel_name, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clCreateKernelsInProgram(cl_program program, - cl_uint num_kernels, - cl_kernel * kernels, - cl_uint * num_kernels_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_2_1 - -extern CL_API_ENTRY cl_kernel CL_API_CALL -clCloneKernel(cl_kernel source_kernel, - cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1; - -#endif - -extern CL_API_ENTRY cl_int CL_API_CALL -clRetainKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clReleaseKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clSetKernelArg(cl_kernel kernel, - cl_uint arg_index, - size_t arg_size, - const void * arg_value) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_2_0 - -extern CL_API_ENTRY cl_int CL_API_CALL -clSetKernelArgSVMPointer(cl_kernel kernel, - cl_uint arg_index, - const void * arg_value) CL_API_SUFFIX__VERSION_2_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clSetKernelExecInfo(cl_kernel kernel, - cl_kernel_exec_info param_name, - size_t param_value_size, - const void * param_value) CL_API_SUFFIX__VERSION_2_0; - -#endif - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetKernelInfo(cl_kernel kernel, - cl_kernel_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetKernelArgInfo(cl_kernel kernel, - cl_uint arg_indx, - cl_kernel_arg_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; - -#endif - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetKernelWorkGroupInfo(cl_kernel kernel, - cl_device_id device, - cl_kernel_work_group_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_2_1 - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetKernelSubGroupInfo(cl_kernel kernel, - cl_device_id device, - cl_kernel_sub_group_info param_name, - size_t input_value_size, - const void* input_value, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_2_1; - -#endif - -/* Event Object APIs */ -extern CL_API_ENTRY cl_int CL_API_CALL -clWaitForEvents(cl_uint num_events, - const cl_event * event_list) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetEventInfo(cl_event event, - cl_event_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_1 - -extern CL_API_ENTRY cl_event CL_API_CALL -clCreateUserEvent(cl_context context, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; - -#endif - -extern CL_API_ENTRY cl_int CL_API_CALL -clRetainEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clReleaseEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_1 - -extern CL_API_ENTRY cl_int CL_API_CALL -clSetUserEventStatus(cl_event event, - cl_int execution_status) CL_API_SUFFIX__VERSION_1_1; - -extern CL_API_ENTRY cl_int CL_API_CALL -clSetEventCallback(cl_event event, - cl_int command_exec_callback_type, - void (CL_CALLBACK * pfn_notify)(cl_event event, - cl_int event_command_status, - void * user_data), - void * user_data) CL_API_SUFFIX__VERSION_1_1; - -#endif - -/* Profiling APIs */ -extern CL_API_ENTRY cl_int CL_API_CALL -clGetEventProfilingInfo(cl_event event, - cl_profiling_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -/* Flush and Finish APIs */ -extern CL_API_ENTRY cl_int CL_API_CALL -clFlush(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clFinish(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; - -/* Enqueued Commands APIs */ -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueReadBuffer(cl_command_queue command_queue, - cl_mem buffer, - cl_bool blocking_read, - size_t offset, - size_t size, - void * ptr, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_1 - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueReadBufferRect(cl_command_queue command_queue, - cl_mem buffer, - cl_bool blocking_read, - const size_t * buffer_offset, - const size_t * host_offset, - const size_t * region, - size_t buffer_row_pitch, - size_t buffer_slice_pitch, - size_t host_row_pitch, - size_t host_slice_pitch, - void * ptr, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_1; - -#endif - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueWriteBuffer(cl_command_queue command_queue, - cl_mem buffer, - cl_bool blocking_write, - size_t offset, - size_t size, - const void * ptr, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_1 - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueWriteBufferRect(cl_command_queue command_queue, - cl_mem buffer, - cl_bool blocking_write, - const size_t * buffer_offset, - const size_t * host_offset, - const size_t * region, - size_t buffer_row_pitch, - size_t buffer_slice_pitch, - size_t host_row_pitch, - size_t host_slice_pitch, - const void * ptr, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_1; - -#endif - -#ifdef CL_VERSION_1_2 - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueFillBuffer(cl_command_queue command_queue, - cl_mem buffer, - const void * pattern, - size_t pattern_size, - size_t offset, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_2; - -#endif - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueCopyBuffer(cl_command_queue command_queue, - cl_mem src_buffer, - cl_mem dst_buffer, - size_t src_offset, - size_t dst_offset, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_1 - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueCopyBufferRect(cl_command_queue command_queue, - cl_mem src_buffer, - cl_mem dst_buffer, - const size_t * src_origin, - const size_t * dst_origin, - const size_t * region, - size_t src_row_pitch, - size_t src_slice_pitch, - size_t dst_row_pitch, - size_t dst_slice_pitch, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_1; - -#endif - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueReadImage(cl_command_queue command_queue, - cl_mem image, - cl_bool blocking_read, - const size_t * origin, - const size_t * region, - size_t row_pitch, - size_t slice_pitch, - void * ptr, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueWriteImage(cl_command_queue command_queue, - cl_mem image, - cl_bool blocking_write, - const size_t * origin, - const size_t * region, - size_t input_row_pitch, - size_t input_slice_pitch, - const void * ptr, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueFillImage(cl_command_queue command_queue, - cl_mem image, - const void * fill_color, - const size_t * origin, - const size_t * region, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_2; - -#endif - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueCopyImage(cl_command_queue command_queue, - cl_mem src_image, - cl_mem dst_image, - const size_t * src_origin, - const size_t * dst_origin, - const size_t * region, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueCopyImageToBuffer(cl_command_queue command_queue, - cl_mem src_image, - cl_mem dst_buffer, - const size_t * src_origin, - const size_t * region, - size_t dst_offset, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueCopyBufferToImage(cl_command_queue command_queue, - cl_mem src_buffer, - cl_mem dst_image, - size_t src_offset, - const size_t * dst_origin, - const size_t * region, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY void * CL_API_CALL -clEnqueueMapBuffer(cl_command_queue command_queue, - cl_mem buffer, - cl_bool blocking_map, - cl_map_flags map_flags, - size_t offset, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY void * CL_API_CALL -clEnqueueMapImage(cl_command_queue command_queue, - cl_mem image, - cl_bool blocking_map, - cl_map_flags map_flags, - const size_t * origin, - const size_t * region, - size_t * image_row_pitch, - size_t * image_slice_pitch, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueUnmapMemObject(cl_command_queue command_queue, - cl_mem memobj, - void * mapped_ptr, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMigrateMemObjects(cl_command_queue command_queue, - cl_uint num_mem_objects, - const cl_mem * mem_objects, - cl_mem_migration_flags flags, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_2; - -#endif - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueNDRangeKernel(cl_command_queue command_queue, - cl_kernel kernel, - cl_uint work_dim, - const size_t * global_work_offset, - const size_t * global_work_size, - const size_t * local_work_size, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueNativeKernel(cl_command_queue command_queue, - void (CL_CALLBACK * user_func)(void *), - void * args, - size_t cb_args, - cl_uint num_mem_objects, - const cl_mem * mem_list, - const void ** args_mem_loc, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMarkerWithWaitList(cl_command_queue command_queue, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueBarrierWithWaitList(cl_command_queue command_queue, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_2; - -#endif - -#ifdef CL_VERSION_2_0 - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueSVMFree(cl_command_queue command_queue, - cl_uint num_svm_pointers, - void * svm_pointers[], - void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue, - cl_uint num_svm_pointers, - void * svm_pointers[], - void * user_data), - void * user_data, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_2_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueSVMMemcpy(cl_command_queue command_queue, - cl_bool blocking_copy, - void * dst_ptr, - const void * src_ptr, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_2_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueSVMMemFill(cl_command_queue command_queue, - void * svm_ptr, - const void * pattern, - size_t pattern_size, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_2_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueSVMMap(cl_command_queue command_queue, - cl_bool blocking_map, - cl_map_flags flags, - void * svm_ptr, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_2_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueSVMUnmap(cl_command_queue command_queue, - void * svm_ptr, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_2_0; - -#endif - -#ifdef CL_VERSION_2_1 - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueSVMMigrateMem(cl_command_queue command_queue, - cl_uint num_svm_pointers, - const void ** svm_pointers, - const size_t * sizes, - cl_mem_migration_flags flags, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_2_1; - -#endif - -#ifdef CL_VERSION_1_2 - -/* Extension function access - * - * Returns the extension function address for the given function name, - * or NULL if a valid function can not be found. The client must - * check to make sure the address is not NULL, before using or - * calling the returned function address. - */ -extern CL_API_ENTRY void * CL_API_CALL -clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, - const char * func_name) CL_API_SUFFIX__VERSION_1_2; - -#endif - -#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS - /* - * WARNING: - * This API introduces mutable state into the OpenCL implementation. It has been REMOVED - * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the - * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably. - * It is likely to be non-performant. Use of this API is not advised. Use at your own risk. - * - * Software developers previously relying on this API are instructed to set the command queue - * properties when creating the queue, instead. - */ - extern CL_API_ENTRY cl_int CL_API_CALL - clSetCommandQueueProperty(cl_command_queue command_queue, - cl_command_queue_properties properties, - cl_bool enable, - cl_command_queue_properties * old_properties) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; -#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ - -/* Deprecated OpenCL 1.1 APIs */ -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL -clCreateImage2D(cl_context context, - cl_mem_flags flags, - const cl_image_format * image_format, - size_t image_width, - size_t image_height, - size_t image_row_pitch, - void * host_ptr, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL -clCreateImage3D(cl_context context, - cl_mem_flags flags, - const cl_image_format * image_format, - size_t image_width, - size_t image_height, - size_t image_depth, - size_t image_row_pitch, - size_t image_slice_pitch, - void * host_ptr, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL -clEnqueueMarker(cl_command_queue command_queue, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL -clEnqueueWaitForEvents(cl_command_queue command_queue, - cl_uint num_events, - const cl_event * event_list) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL -clEnqueueBarrier(cl_command_queue command_queue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL -clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL -clGetExtensionFunctionAddress(const char * func_name) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -/* Deprecated OpenCL 2.0 APIs */ -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL -clCreateCommandQueue(cl_context context, - cl_device_id device, - cl_command_queue_properties properties, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; - -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL -clCreateSampler(cl_context context, - cl_bool normalized_coords, - cl_addressing_mode addressing_mode, - cl_filter_mode filter_mode, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; - -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL -clEnqueueTask(cl_command_queue command_queue, - cl_kernel kernel, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; - -#ifdef __cplusplus -} -#endif - -#endif /* __OPENCL_CL_H */ diff --git a/src/CL/cl_d3d10.h b/src/CL/cl_d3d10.h deleted file mode 100644 index cda5469..0000000 --- a/src/CL/cl_d3d10.h +++ /dev/null @@ -1,117 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ - -#ifndef __OPENCL_CL_D3D10_H -#define __OPENCL_CL_D3D10_H - -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/****************************************************************************** - * cl_khr_d3d10_sharing */ -#define cl_khr_d3d10_sharing 1 - -typedef cl_uint cl_d3d10_device_source_khr; -typedef cl_uint cl_d3d10_device_set_khr; - -/******************************************************************************/ - -/* Error Codes */ -#define CL_INVALID_D3D10_DEVICE_KHR -1002 -#define CL_INVALID_D3D10_RESOURCE_KHR -1003 -#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004 -#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005 - -/* cl_d3d10_device_source_nv */ -#define CL_D3D10_DEVICE_KHR 0x4010 -#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011 - -/* cl_d3d10_device_set_nv */ -#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012 -#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013 - -/* cl_context_info */ -#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014 -#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C - -/* cl_mem_info */ -#define CL_MEM_D3D10_RESOURCE_KHR 0x4015 - -/* cl_image_info */ -#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016 - -/* cl_command_type */ -#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017 -#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018 - -/******************************************************************************/ - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)( - cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)( - cl_context context, - cl_mem_flags flags, - ID3D10Buffer * resource, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)( - cl_context context, - cl_mem_flags flags, - ID3D10Texture2D * resource, - UINT subresource, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)( - cl_context context, - cl_mem_flags flags, - ID3D10Texture3D * resource, - UINT subresource, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef __cplusplus -} -#endif - -#endif /* __OPENCL_CL_D3D10_H */ - diff --git a/src/CL/cl_d3d11.h b/src/CL/cl_d3d11.h deleted file mode 100644 index 6b7e2e9..0000000 --- a/src/CL/cl_d3d11.h +++ /dev/null @@ -1,117 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ - -#ifndef __OPENCL_CL_D3D11_H -#define __OPENCL_CL_D3D11_H - -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/****************************************************************************** - * cl_khr_d3d11_sharing */ -#define cl_khr_d3d11_sharing 1 - -typedef cl_uint cl_d3d11_device_source_khr; -typedef cl_uint cl_d3d11_device_set_khr; - -/******************************************************************************/ - -/* Error Codes */ -#define CL_INVALID_D3D11_DEVICE_KHR -1006 -#define CL_INVALID_D3D11_RESOURCE_KHR -1007 -#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008 -#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009 - -/* cl_d3d11_device_source */ -#define CL_D3D11_DEVICE_KHR 0x4019 -#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A - -/* cl_d3d11_device_set */ -#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B -#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C - -/* cl_context_info */ -#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D -#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D - -/* cl_mem_info */ -#define CL_MEM_D3D11_RESOURCE_KHR 0x401E - -/* cl_image_info */ -#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F - -/* cl_command_type */ -#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020 -#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021 - -/******************************************************************************/ - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)( - cl_platform_id platform, - cl_d3d11_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d11_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)( - cl_context context, - cl_mem_flags flags, - ID3D11Buffer * resource, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)( - cl_context context, - cl_mem_flags flags, - ID3D11Texture2D * resource, - UINT subresource, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)( - cl_context context, - cl_mem_flags flags, - ID3D11Texture3D * resource, - UINT subresource, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_2; - -#ifdef __cplusplus -} -#endif - -#endif /* __OPENCL_CL_D3D11_H */ - diff --git a/src/CL/cl_dx9_media_sharing.h b/src/CL/cl_dx9_media_sharing.h deleted file mode 100644 index 0489370..0000000 --- a/src/CL/cl_dx9_media_sharing.h +++ /dev/null @@ -1,118 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ - -#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H -#define __OPENCL_CL_DX9_MEDIA_SHARING_H - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/******************************************************************************/ -/* cl_khr_dx9_media_sharing */ -#define cl_khr_dx9_media_sharing 1 - -typedef cl_uint cl_dx9_media_adapter_type_khr; -typedef cl_uint cl_dx9_media_adapter_set_khr; - -#if defined(_WIN32) -#include -typedef struct _cl_dx9_surface_info_khr -{ - IDirect3DSurface9 *resource; - HANDLE shared_handle; -} cl_dx9_surface_info_khr; -#endif - - -/******************************************************************************/ - -/* Error Codes */ -#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010 -#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011 -#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012 -#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013 - -/* cl_media_adapter_type_khr */ -#define CL_ADAPTER_D3D9_KHR 0x2020 -#define CL_ADAPTER_D3D9EX_KHR 0x2021 -#define CL_ADAPTER_DXVA_KHR 0x2022 - -/* cl_media_adapter_set_khr */ -#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023 -#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024 - -/* cl_context_info */ -#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025 -#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026 -#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027 - -/* cl_mem_info */ -#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028 -#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029 - -/* cl_image_info */ -#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A - -/* cl_command_type */ -#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B -#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C - -/******************************************************************************/ - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)( - cl_platform_id platform, - cl_uint num_media_adapters, - cl_dx9_media_adapter_type_khr * media_adapter_type, - void * media_adapters, - cl_dx9_media_adapter_set_khr media_adapter_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)( - cl_context context, - cl_mem_flags flags, - cl_dx9_media_adapter_type_khr adapter_type, - void * surface_info, - cl_uint plane, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_2; - -#ifdef __cplusplus -} -#endif - -#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */ - diff --git a/src/CL/cl_dx9_media_sharing_intel.h b/src/CL/cl_dx9_media_sharing_intel.h deleted file mode 100644 index 4525a17..0000000 --- a/src/CL/cl_dx9_media_sharing_intel.h +++ /dev/null @@ -1,170 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ -/*****************************************************************************\ - -Copyright (c) 2013-2019 Intel Corporation All Rights Reserved. - -THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE -MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -File Name: cl_dx9_media_sharing_intel.h - -Abstract: - -Notes: - -\*****************************************************************************/ - -#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H -#define __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H - -#include -#include -#include -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/*************************************** -* cl_intel_dx9_media_sharing extension * -****************************************/ - -#define cl_intel_dx9_media_sharing 1 - -typedef cl_uint cl_dx9_device_source_intel; -typedef cl_uint cl_dx9_device_set_intel; - -/* error codes */ -#define CL_INVALID_DX9_DEVICE_INTEL -1010 -#define CL_INVALID_DX9_RESOURCE_INTEL -1011 -#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012 -#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013 - -/* cl_dx9_device_source_intel */ -#define CL_D3D9_DEVICE_INTEL 0x4022 -#define CL_D3D9EX_DEVICE_INTEL 0x4070 -#define CL_DXVA_DEVICE_INTEL 0x4071 - -/* cl_dx9_device_set_intel */ -#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024 -#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025 - -/* cl_context_info */ -#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026 -#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072 -#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073 - -/* cl_mem_info */ -#define CL_MEM_DX9_RESOURCE_INTEL 0x4027 -#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074 - -/* cl_image_info */ -#define CL_IMAGE_DX9_PLANE_INTEL 0x4075 - -/* cl_command_type */ -#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A -#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B -/******************************************************************************/ - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetDeviceIDsFromDX9INTEL( - cl_platform_id platform, - cl_dx9_device_source_intel dx9_device_source, - void* dx9_object, - cl_dx9_device_set_intel dx9_device_set, - cl_uint num_entries, - cl_device_id* devices, - cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)( - cl_platform_id platform, - cl_dx9_device_source_intel dx9_device_source, - void* dx9_object, - cl_dx9_device_set_intel dx9_device_set, - cl_uint num_entries, - cl_device_id* devices, - cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1; - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateFromDX9MediaSurfaceINTEL( - cl_context context, - cl_mem_flags flags, - IDirect3DSurface9* resource, - HANDLE sharedHandle, - UINT plane, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)( - cl_context context, - cl_mem_flags flags, - IDirect3DSurface9* resource, - HANDLE sharedHandle, - UINT plane, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueAcquireDX9ObjectsINTEL( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueReleaseDX9ObjectsINTEL( - cl_command_queue command_queue, - cl_uint num_objects, - cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)( - cl_command_queue command_queue, - cl_uint num_objects, - cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; - -#ifdef __cplusplus -} -#endif - -#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H */ - diff --git a/src/CL/cl_egl.h b/src/CL/cl_egl.h deleted file mode 100644 index c8bde80..0000000 --- a/src/CL/cl_egl.h +++ /dev/null @@ -1,120 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ - -#ifndef __OPENCL_CL_EGL_H -#define __OPENCL_CL_EGL_H - -#include - -#ifdef __cplusplus -extern "C" { -#endif - - -/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */ -#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F -#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D -#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E - -/* Error type for clCreateFromEGLImageKHR */ -#define CL_INVALID_EGL_OBJECT_KHR -1093 -#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092 - -/* CLeglImageKHR is an opaque handle to an EGLImage */ -typedef void* CLeglImageKHR; - -/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */ -typedef void* CLeglDisplayKHR; - -/* CLeglSyncKHR is an opaque handle to an EGLSync object */ -typedef void* CLeglSyncKHR; - -/* properties passed to clCreateFromEGLImageKHR */ -typedef intptr_t cl_egl_image_properties_khr; - - -#define cl_khr_egl_image 1 - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateFromEGLImageKHR(cl_context context, - CLeglDisplayKHR egldisplay, - CLeglImageKHR eglimage, - cl_mem_flags flags, - const cl_egl_image_properties_khr * properties, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)( - cl_context context, - CLeglDisplayKHR egldisplay, - CLeglImageKHR eglimage, - cl_mem_flags flags, - const cl_egl_image_properties_khr * properties, - cl_int * errcode_ret); - - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event); - - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event); - - -#define cl_khr_egl_event 1 - -extern CL_API_ENTRY cl_event CL_API_CALL -clCreateEventFromEGLSyncKHR(cl_context context, - CLeglSyncKHR sync, - CLeglDisplayKHR display, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)( - cl_context context, - CLeglSyncKHR sync, - CLeglDisplayKHR display, - cl_int * errcode_ret); - -#ifdef __cplusplus -} -#endif - -#endif /* __OPENCL_CL_EGL_H */ diff --git a/src/CL/cl_ext.h b/src/CL/cl_ext.h deleted file mode 100644 index cd86843..0000000 --- a/src/CL/cl_ext.h +++ /dev/null @@ -1,841 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ - -/* cl_ext.h contains OpenCL extensions which don't have external */ -/* (OpenGL, D3D) dependencies. */ - -#ifndef __CL_EXT_H -#define __CL_EXT_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -/* cl_khr_fp64 extension - no extension #define since it has no functions */ -/* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */ - -#if CL_TARGET_OPENCL_VERSION <= 110 -#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 -#endif - -/* cl_khr_fp16 extension - no extension #define since it has no functions */ -#define CL_DEVICE_HALF_FP_CONFIG 0x1033 - -/* Memory object destruction - * - * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR - * - * Registers a user callback function that will be called when the memory object is deleted and its resources - * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback - * stack associated with memobj. The registered user callback functions are called in the reverse order in - * which they were registered. The user callback functions are called and then the memory object is deleted - * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be - * notified when the memory referenced by host_ptr, specified when the memory object is created and used as - * the storage bits for the memory object, can be reused or freed. - * - * The application may not call CL api's with the cl_mem object passed to the pfn_notify. - * - * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) - * before using. - */ -#define cl_APPLE_SetMemObjectDestructor 1 -cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem memobj, - void (* pfn_notify)(cl_mem memobj, void * user_data), - void * user_data) CL_EXT_SUFFIX__VERSION_1_0; - - -/* Context Logging Functions - * - * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext(). - * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) - * before using. - * - * clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger - */ -#define cl_APPLE_ContextLoggingFunctions 1 -extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * errstr, - const void * private_info, - size_t cb, - void * user_data) CL_EXT_SUFFIX__VERSION_1_0; - -/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */ -extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * errstr, - const void * private_info, - size_t cb, - void * user_data) CL_EXT_SUFFIX__VERSION_1_0; - -/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */ -extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * errstr, - const void * private_info, - size_t cb, - void * user_data) CL_EXT_SUFFIX__VERSION_1_0; - - -/************************ -* cl_khr_icd extension * -************************/ -#define cl_khr_icd 1 - -/* cl_platform_info */ -#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 - -/* Additional Error Codes */ -#define CL_PLATFORM_NOT_FOUND_KHR -1001 - -extern CL_API_ENTRY cl_int CL_API_CALL -clIcdGetPlatformIDsKHR(cl_uint num_entries, - cl_platform_id * platforms, - cl_uint * num_platforms); - -typedef CL_API_ENTRY cl_int -(CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint num_entries, - cl_platform_id * platforms, - cl_uint * num_platforms); - - -/******************************* - * cl_khr_il_program extension * - *******************************/ -#define cl_khr_il_program 1 - -/* New property to clGetDeviceInfo for retrieving supported intermediate - * languages - */ -#define CL_DEVICE_IL_VERSION_KHR 0x105B - -/* New property to clGetProgramInfo for retrieving for retrieving the IL of a - * program - */ -#define CL_PROGRAM_IL_KHR 0x1169 - -extern CL_API_ENTRY cl_program CL_API_CALL -clCreateProgramWithILKHR(cl_context context, - const void * il, - size_t length, - cl_int * errcode_ret); - -typedef CL_API_ENTRY cl_program -(CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context context, - const void * il, - size_t length, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; - -/* Extension: cl_khr_image2d_from_buffer - * - * This extension allows a 2D image to be created from a cl_mem buffer without - * a copy. The type associated with a 2D image created from a buffer in an - * OpenCL program is image2d_t. Both the sampler and sampler-less read_image - * built-in functions are supported for 2D images and 2D images created from - * a buffer. Similarly, the write_image built-ins are also supported for 2D - * images created from a buffer. - * - * When the 2D image from buffer is created, the client must specify the - * width, height, image format (i.e. channel order and channel data type) - * and optionally the row pitch. - * - * The pitch specified must be a multiple of - * CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR pixels. - * The base address of the buffer must be aligned to - * CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR pixels. - */ - -#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR 0x104A -#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR 0x104B - - -/************************************** - * cl_khr_initialize_memory extension * - **************************************/ - -#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030 - - -/************************************** - * cl_khr_terminate_context extension * - **************************************/ - -#define CL_CONTEXT_TERMINATED_KHR -1121 - -#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031 -#define CL_CONTEXT_TERMINATE_KHR 0x2032 - -#define cl_khr_terminate_context 1 -extern CL_API_ENTRY cl_int CL_API_CALL -clTerminateContextKHR(cl_context context) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int -(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_EXT_SUFFIX__VERSION_1_2; - - -/* - * Extension: cl_khr_spir - * - * This extension adds support to create an OpenCL program object from a - * Standard Portable Intermediate Representation (SPIR) instance - */ - -#define CL_DEVICE_SPIR_VERSIONS 0x40E0 -#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1 - - -/***************************************** - * cl_khr_create_command_queue extension * - *****************************************/ -#define cl_khr_create_command_queue 1 - -typedef cl_bitfield cl_queue_properties_khr; - -extern CL_API_ENTRY cl_command_queue CL_API_CALL -clCreateCommandQueueWithPropertiesKHR(cl_context context, - cl_device_id device, - const cl_queue_properties_khr* properties, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_command_queue -(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context, - cl_device_id device, - const cl_queue_properties_khr* properties, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; - - -/****************************************** -* cl_nv_device_attribute_query extension * -******************************************/ - -/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ -#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 -#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 -#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 -#define CL_DEVICE_WARP_SIZE_NV 0x4003 -#define CL_DEVICE_GPU_OVERLAP_NV 0x4004 -#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 -#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 - - -/********************************* -* cl_amd_device_attribute_query * -*********************************/ - -#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 -#define CL_DEVICE_TOPOLOGY_AMD 0x4037 -#define CL_DEVICE_BOARD_NAME_AMD 0x4038 -#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039 -#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040 -#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041 -#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042 -#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 -#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044 -#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045 -#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046 -#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047 -#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048 -#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049 -#define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A -#define CL_DEVICE_GFXIP_MINOR_AMD 0x404B -#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C -#define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD 0x4030 -#define CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD 0x4031 -#define CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD 0x4033 -#define CL_DEVICE_PCIE_ID_AMD 0x4034 - - -/********************************* -* cl_arm_printf extension -*********************************/ - -#define CL_PRINTF_CALLBACK_ARM 0x40B0 -#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1 - - -/*********************************** -* cl_ext_device_fission extension -***********************************/ -#define cl_ext_device_fission 1 - -extern CL_API_ENTRY cl_int CL_API_CALL -clReleaseDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_int -(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; - -extern CL_API_ENTRY cl_int CL_API_CALL -clRetainDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_int -(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; - -typedef cl_ulong cl_device_partition_property_ext; -extern CL_API_ENTRY cl_int CL_API_CALL -clCreateSubDevicesEXT(cl_device_id in_device, - const cl_device_partition_property_ext * properties, - cl_uint num_entries, - cl_device_id * out_devices, - cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_int -(CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id in_device, - const cl_device_partition_property_ext * properties, - cl_uint num_entries, - cl_device_id * out_devices, - cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1; - -/* cl_device_partition_property_ext */ -#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 -#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 -#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 -#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 - -/* clDeviceGetInfo selectors */ -#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 -#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 -#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 -#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 -#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 - -/* error codes */ -#define CL_DEVICE_PARTITION_FAILED_EXT -1057 -#define CL_INVALID_PARTITION_COUNT_EXT -1058 -#define CL_INVALID_PARTITION_NAME_EXT -1059 - -/* CL_AFFINITY_DOMAINs */ -#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 -#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 -#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 -#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 -#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 -#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 - -/* cl_device_partition_property_ext list terminators */ -#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0) -#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0) -#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1) - - -/*********************************** - * cl_ext_migrate_memobject extension definitions - ***********************************/ -#define cl_ext_migrate_memobject 1 - -typedef cl_bitfield cl_mem_migration_flags_ext; - -#define CL_MIGRATE_MEM_OBJECT_HOST_EXT 0x1 - -#define CL_COMMAND_MIGRATE_MEM_OBJECT_EXT 0x4040 - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue, - cl_uint num_mem_objects, - const cl_mem * mem_objects, - cl_mem_migration_flags_ext flags, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event); - -typedef CL_API_ENTRY cl_int -(CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue, - cl_uint num_mem_objects, - const cl_mem * mem_objects, - cl_mem_migration_flags_ext flags, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event); - - -/********************************* -* cl_qcom_ext_host_ptr extension -*********************************/ -#define cl_qcom_ext_host_ptr 1 - -#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29) - -#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 -#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 -#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2 -#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3 -#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4 -#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5 -#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6 -#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7 - -typedef cl_uint cl_image_pitch_info_qcom; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetDeviceImageInfoQCOM(cl_device_id device, - size_t image_width, - size_t image_height, - const cl_image_format *image_format, - cl_image_pitch_info_qcom param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret); - -typedef struct _cl_mem_ext_host_ptr -{ - /* Type of external memory allocation. */ - /* Legal values will be defined in layered extensions. */ - cl_uint allocation_type; - - /* Host cache policy for this external memory allocation. */ - cl_uint host_cache_policy; - -} cl_mem_ext_host_ptr; - - -/******************************************* -* cl_qcom_ext_host_ptr_iocoherent extension -********************************************/ - -/* Cache policy specifying io-coherence */ -#define CL_MEM_HOST_IOCOHERENT_QCOM 0x40A9 - - -/********************************* -* cl_qcom_ion_host_ptr extension -*********************************/ - -#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8 - -typedef struct _cl_mem_ion_host_ptr -{ - /* Type of external memory allocation. */ - /* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */ - cl_mem_ext_host_ptr ext_host_ptr; - - /* ION file descriptor */ - int ion_filedesc; - - /* Host pointer to the ION allocated memory */ - void* ion_hostptr; - -} cl_mem_ion_host_ptr; - - -/********************************* -* cl_qcom_android_native_buffer_host_ptr extension -*********************************/ - -#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6 - -typedef struct _cl_mem_android_native_buffer_host_ptr -{ - /* Type of external memory allocation. */ - /* Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. */ - cl_mem_ext_host_ptr ext_host_ptr; - - /* Virtual pointer to the android native buffer */ - void* anb_ptr; - -} cl_mem_android_native_buffer_host_ptr; - - -/****************************************** - * cl_img_yuv_image extension * - ******************************************/ - -/* Image formats used in clCreateImage */ -#define CL_NV21_IMG 0x40D0 -#define CL_YV12_IMG 0x40D1 - - -/****************************************** - * cl_img_cached_allocations extension * - ******************************************/ - -/* Flag values used by clCreateBuffer */ -#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG (1 << 26) -#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG (1 << 27) - - -/****************************************** - * cl_img_use_gralloc_ptr extension * - ******************************************/ -#define cl_img_use_gralloc_ptr 1 - -/* Flag values used by clCreateBuffer */ -#define CL_MEM_USE_GRALLOC_PTR_IMG (1 << 28) - -/* To be used by clGetEventInfo: */ -#define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG 0x40D2 -#define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG 0x40D3 - -/* Error code from clEnqueueReleaseGrallocObjectsIMG */ -#define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG 0x40D4 - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueAcquireGrallocObjectsIMG(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueReleaseGrallocObjectsIMG(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; - - -/********************************* -* cl_khr_subgroups extension -*********************************/ -#define cl_khr_subgroups 1 - -#if !defined(CL_VERSION_2_1) -/* For OpenCL 2.1 and newer, cl_kernel_sub_group_info is declared in CL.h. - In hindsight, there should have been a khr suffix on this type for - the extension, but keeping it un-suffixed to maintain backwards - compatibility. */ -typedef cl_uint cl_kernel_sub_group_info; -#endif - -/* cl_kernel_sub_group_info */ -#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033 -#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034 - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetKernelSubGroupInfoKHR(cl_kernel in_kernel, - cl_device_id in_device, - cl_kernel_sub_group_info param_name, - size_t input_value_size, - const void * input_value, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED; - -typedef CL_API_ENTRY cl_int -(CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel in_kernel, - cl_device_id in_device, - cl_kernel_sub_group_info param_name, - size_t input_value_size, - const void * input_value, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED; - - -/********************************* -* cl_khr_mipmap_image extension -*********************************/ - -/* cl_sampler_properties */ -#define CL_SAMPLER_MIP_FILTER_MODE_KHR 0x1155 -#define CL_SAMPLER_LOD_MIN_KHR 0x1156 -#define CL_SAMPLER_LOD_MAX_KHR 0x1157 - - -/********************************* -* cl_khr_priority_hints extension -*********************************/ -/* This extension define is for backwards compatibility. - It shouldn't be required since this extension has no new functions. */ -#define cl_khr_priority_hints 1 - -typedef cl_uint cl_queue_priority_khr; - -/* cl_command_queue_properties */ -#define CL_QUEUE_PRIORITY_KHR 0x1096 - -/* cl_queue_priority_khr */ -#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0) -#define CL_QUEUE_PRIORITY_MED_KHR (1<<1) -#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2) - - -/********************************* -* cl_khr_throttle_hints extension -*********************************/ -/* This extension define is for backwards compatibility. - It shouldn't be required since this extension has no new functions. */ -#define cl_khr_throttle_hints 1 - -typedef cl_uint cl_queue_throttle_khr; - -/* cl_command_queue_properties */ -#define CL_QUEUE_THROTTLE_KHR 0x1097 - -/* cl_queue_throttle_khr */ -#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0) -#define CL_QUEUE_THROTTLE_MED_KHR (1<<1) -#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2) - - -/********************************* -* cl_khr_subgroup_named_barrier -*********************************/ -/* This extension define is for backwards compatibility. - It shouldn't be required since this extension has no new functions. */ -#define cl_khr_subgroup_named_barrier 1 - -/* cl_device_info */ -#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035 - - -/********************************* -* cl_khr_extended_versioning -*********************************/ - -#define cl_khr_extended_versioning 1 - -#define CL_VERSION_MAJOR_BITS_KHR (10) -#define CL_VERSION_MINOR_BITS_KHR (10) -#define CL_VERSION_PATCH_BITS_KHR (12) - -#define CL_VERSION_MAJOR_MASK_KHR ((1 << CL_VERSION_MAJOR_BITS_KHR) - 1) -#define CL_VERSION_MINOR_MASK_KHR ((1 << CL_VERSION_MINOR_BITS_KHR) - 1) -#define CL_VERSION_PATCH_MASK_KHR ((1 << CL_VERSION_PATCH_BITS_KHR) - 1) - -#define CL_VERSION_MAJOR_KHR(version) ((version) >> (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) -#define CL_VERSION_MINOR_KHR(version) (((version) >> CL_VERSION_PATCH_BITS_KHR) & CL_VERSION_MINOR_MASK_KHR) -#define CL_VERSION_PATCH_KHR(version) ((version) & CL_VERSION_PATCH_MASK_KHR) - -#define CL_MAKE_VERSION_KHR(major, minor, patch) \ - ((((major) & CL_VERSION_MAJOR_MASK_KHR) << (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) | \ - (((minor) & CL_VERSION_MINOR_MASK_KHR) << CL_VERSION_PATCH_BITS_KHR) | \ - ((patch) & CL_VERSION_PATCH_MASK_KHR)) - -typedef cl_uint cl_version_khr; - -#define CL_NAME_VERSION_MAX_NAME_SIZE_KHR 64 - -typedef struct _cl_name_version_khr -{ - cl_version_khr version; - char name[CL_NAME_VERSION_MAX_NAME_SIZE_KHR]; -} cl_name_version_khr; - -/* cl_platform_info */ -#define CL_PLATFORM_NUMERIC_VERSION_KHR 0x0906 -#define CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR 0x0907 - -/* cl_device_info */ -#define CL_DEVICE_NUMERIC_VERSION_KHR 0x105E -#define CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR 0x105F -#define CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR 0x1060 -#define CL_DEVICE_ILS_WITH_VERSION_KHR 0x1061 -#define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR 0x1062 - - -/********************************* -* cl_khr_device_uuid extension -*********************************/ -#define cl_khr_device_uuid 1 - -#define CL_UUID_SIZE_KHR 16 -#define CL_LUID_SIZE_KHR 8 - -#define CL_DEVICE_UUID_KHR 0x106A -#define CL_DRIVER_UUID_KHR 0x106B -#define CL_DEVICE_LUID_VALID_KHR 0x106C -#define CL_DEVICE_LUID_KHR 0x106D -#define CL_DEVICE_NODE_MASK_KHR 0x106E - - -/********************************** - * cl_arm_import_memory extension * - **********************************/ -#define cl_arm_import_memory 1 - -typedef intptr_t cl_import_properties_arm; - -/* Default and valid proporties name for cl_arm_import_memory */ -#define CL_IMPORT_TYPE_ARM 0x40B2 - -/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */ -#define CL_IMPORT_TYPE_HOST_ARM 0x40B3 - -/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */ -#define CL_IMPORT_TYPE_DMA_BUF_ARM 0x40B4 - -/* Protected memory property */ -#define CL_IMPORT_TYPE_PROTECTED_ARM 0x40B5 - -/* Android hardware buffer type value for CL_IMPORT_TYPE_ARM property */ -#define CL_IMPORT_TYPE_ANDROID_HARDWARE_BUFFER_ARM 0x41E2 - -/* Data consistency with host property */ -#define CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM 0x41E3 - -/* Import memory size value to indicate a size for the whole buffer */ -#define CL_IMPORT_MEMORY_WHOLE_ALLOCATION_ARM SIZE_MAX - -/* This extension adds a new function that allows for direct memory import into - * OpenCL via the clImportMemoryARM function. - * - * Memory imported through this interface will be mapped into the device's page - * tables directly, providing zero copy access. It will never fall back to copy - * operations and aliased buffers. - * - * Types of memory supported for import are specified as additional extension - * strings. - * - * This extension produces cl_mem allocations which are compatible with all other - * users of cl_mem in the standard API. - * - * This extension maps pages with the same properties as the normal buffer creation - * function clCreateBuffer. - */ -extern CL_API_ENTRY cl_mem CL_API_CALL -clImportMemoryARM( cl_context context, - cl_mem_flags flags, - const cl_import_properties_arm *properties, - void *memory, - size_t size, - cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0; - - -/****************************************** - * cl_arm_shared_virtual_memory extension * - ******************************************/ -#define cl_arm_shared_virtual_memory 1 - -/* Used by clGetDeviceInfo */ -#define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6 - -/* Used by clGetMemObjectInfo */ -#define CL_MEM_USES_SVM_POINTER_ARM 0x40B7 - -/* Used by clSetKernelExecInfoARM: */ -#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM 0x40B8 -#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM 0x40B9 - -/* To be used by clGetEventInfo: */ -#define CL_COMMAND_SVM_FREE_ARM 0x40BA -#define CL_COMMAND_SVM_MEMCPY_ARM 0x40BB -#define CL_COMMAND_SVM_MEMFILL_ARM 0x40BC -#define CL_COMMAND_SVM_MAP_ARM 0x40BD -#define CL_COMMAND_SVM_UNMAP_ARM 0x40BE - -/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */ -#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM (1 << 0) -#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM (1 << 1) -#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM (1 << 2) -#define CL_DEVICE_SVM_ATOMICS_ARM (1 << 3) - -/* Flag values used by clSVMAllocARM: */ -#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM (1 << 10) -#define CL_MEM_SVM_ATOMICS_ARM (1 << 11) - -typedef cl_bitfield cl_svm_mem_flags_arm; -typedef cl_uint cl_kernel_exec_info_arm; -typedef cl_bitfield cl_device_svm_capabilities_arm; - -extern CL_API_ENTRY void * CL_API_CALL -clSVMAllocARM(cl_context context, - cl_svm_mem_flags_arm flags, - size_t size, - cl_uint alignment) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY void CL_API_CALL -clSVMFreeARM(cl_context context, - void * svm_pointer) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueSVMFreeARM(cl_command_queue command_queue, - cl_uint num_svm_pointers, - void * svm_pointers[], - void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue, - cl_uint num_svm_pointers, - void * svm_pointers[], - void * user_data), - void * user_data, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueSVMMemcpyARM(cl_command_queue command_queue, - cl_bool blocking_copy, - void * dst_ptr, - const void * src_ptr, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueSVMMemFillARM(cl_command_queue command_queue, - void * svm_ptr, - const void * pattern, - size_t pattern_size, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueSVMMapARM(cl_command_queue command_queue, - cl_bool blocking_map, - cl_map_flags flags, - void * svm_ptr, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueSVMUnmapARM(cl_command_queue command_queue, - void * svm_ptr, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clSetKernelArgSVMPointerARM(cl_kernel kernel, - cl_uint arg_index, - const void * arg_value) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clSetKernelExecInfoARM(cl_kernel kernel, - cl_kernel_exec_info_arm param_name, - size_t param_value_size, - const void * param_value) CL_EXT_SUFFIX__VERSION_1_2; - -/******************************** - * cl_arm_get_core_id extension * - ********************************/ - -#ifdef CL_VERSION_1_2 - -#define cl_arm_get_core_id 1 - -/* Device info property for bitfield of cores present */ -#define CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM 0x40BF - -#endif /* CL_VERSION_1_2 */ - -/********************************* -* cl_arm_job_slot_selection -*********************************/ - -#define cl_arm_job_slot_selection 1 - -/* cl_device_info */ -#define CL_DEVICE_JOB_SLOTS_ARM 0x41E0 - -/* cl_command_queue_properties */ -#define CL_QUEUE_JOB_SLOT_ARM 0x41E1 - -#ifdef __cplusplus -} -#endif - - -#endif /* __CL_EXT_H */ diff --git a/src/CL/cl_ext_intel.h b/src/CL/cl_ext_intel.h deleted file mode 100644 index f044684..0000000 --- a/src/CL/cl_ext_intel.h +++ /dev/null @@ -1,682 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - ******************************************************************************/ -/*****************************************************************************\ - -Copyright (c) 2013-2020 Intel Corporation All Rights Reserved. - -THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE -MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -File Name: cl_ext_intel.h - -Abstract: - -Notes: - -\*****************************************************************************/ - -#ifndef __CL_EXT_INTEL_H -#define __CL_EXT_INTEL_H - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/*************************************** -* cl_intel_thread_local_exec extension * -****************************************/ - -#define cl_intel_thread_local_exec 1 - -#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31) - -/*********************************************** -* cl_intel_device_partition_by_names extension * -************************************************/ - -#define cl_intel_device_partition_by_names 1 - -#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052 -#define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1 - -/************************************************ -* cl_intel_accelerator extension * -* cl_intel_motion_estimation extension * -* cl_intel_advanced_motion_estimation extension * -*************************************************/ - -#define cl_intel_accelerator 1 -#define cl_intel_motion_estimation 1 -#define cl_intel_advanced_motion_estimation 1 - -typedef struct _cl_accelerator_intel* cl_accelerator_intel; -typedef cl_uint cl_accelerator_type_intel; -typedef cl_uint cl_accelerator_info_intel; - -typedef struct _cl_motion_estimation_desc_intel { - cl_uint mb_block_type; - cl_uint subpixel_mode; - cl_uint sad_adjust_mode; - cl_uint search_path_type; -} cl_motion_estimation_desc_intel; - -/* error codes */ -#define CL_INVALID_ACCELERATOR_INTEL -1094 -#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095 -#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096 -#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097 - -/* cl_accelerator_type_intel */ -#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0 - -/* cl_accelerator_info_intel */ -#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090 -#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091 -#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092 -#define CL_ACCELERATOR_TYPE_INTEL 0x4093 - -/* cl_motion_detect_desc_intel flags */ -#define CL_ME_MB_TYPE_16x16_INTEL 0x0 -#define CL_ME_MB_TYPE_8x8_INTEL 0x1 -#define CL_ME_MB_TYPE_4x4_INTEL 0x2 - -#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 -#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 -#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2 - -#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 -#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1 - -#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0 -#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1 -#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5 - -#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0 -#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1 -#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2 -#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4 - -#define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1 -#define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2 -#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3 - -#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16 -#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21 -#define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32 -#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43 -#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48 - -#define CL_ME_COST_PENALTY_NONE_INTEL 0x0 -#define CL_ME_COST_PENALTY_LOW_INTEL 0x1 -#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2 -#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3 - -#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0 -#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1 -#define CL_ME_COST_PRECISION_PEL_INTEL 0x2 -#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3 - -#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 -#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 -#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 -#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 - -#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 -#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 -#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 -#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 -#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 -#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 - -#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 -#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 -#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 -#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 - -/* cl_device_info */ -#define CL_DEVICE_ME_VERSION_INTEL 0x407E - -#define CL_ME_VERSION_LEGACY_INTEL 0x0 -#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1 -#define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2 - -extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL -clCreateAcceleratorINTEL( - cl_context context, - cl_accelerator_type_intel accelerator_type, - size_t descriptor_size, - const void* descriptor, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)( - cl_context context, - cl_accelerator_type_intel accelerator_type, - size_t descriptor_size, - const void* descriptor, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetAcceleratorInfoINTEL( - cl_accelerator_intel accelerator, - cl_accelerator_info_intel param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)( - cl_accelerator_intel accelerator, - cl_accelerator_info_intel param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clRetainAcceleratorINTEL( - cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)( - cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clReleaseAcceleratorINTEL( - cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)( - cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; - -/****************************************** -* cl_intel_simultaneous_sharing extension * -*******************************************/ - -#define cl_intel_simultaneous_sharing 1 - -#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104 -#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105 - -/*********************************** -* cl_intel_egl_image_yuv extension * -************************************/ - -#define cl_intel_egl_image_yuv 1 - -#define CL_EGL_YUV_PLANE_INTEL 0x4107 - -/******************************** -* cl_intel_packed_yuv extension * -*********************************/ - -#define cl_intel_packed_yuv 1 - -#define CL_YUYV_INTEL 0x4076 -#define CL_UYVY_INTEL 0x4077 -#define CL_YVYU_INTEL 0x4078 -#define CL_VYUY_INTEL 0x4079 - -/******************************************** -* cl_intel_required_subgroup_size extension * -*********************************************/ - -#define cl_intel_required_subgroup_size 1 - -#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108 -#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109 -#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A - -/**************************************** -* cl_intel_driver_diagnostics extension * -*****************************************/ - -#define cl_intel_driver_diagnostics 1 - -typedef cl_uint cl_diagnostics_verbose_level; - -#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106 - -#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff ) -#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 ) -#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 ) -#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 ) - -/******************************** -* cl_intel_planar_yuv extension * -*********************************/ - -#define CL_NV12_INTEL 0x410E - -#define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 ) -#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 ) - -#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E -#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F - -/******************************************************* -* cl_intel_device_side_avc_motion_estimation extension * -********************************************************/ - -#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B -#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C -#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D - -#define CL_AVC_ME_VERSION_0_INTEL 0x0 /* No support. */ -#define CL_AVC_ME_VERSION_1_INTEL 0x1 /* First supported version. */ - -#define CL_AVC_ME_MAJOR_16x16_INTEL 0x0 -#define CL_AVC_ME_MAJOR_16x8_INTEL 0x1 -#define CL_AVC_ME_MAJOR_8x16_INTEL 0x2 -#define CL_AVC_ME_MAJOR_8x8_INTEL 0x3 - -#define CL_AVC_ME_MINOR_8x8_INTEL 0x0 -#define CL_AVC_ME_MINOR_8x4_INTEL 0x1 -#define CL_AVC_ME_MINOR_4x8_INTEL 0x2 -#define CL_AVC_ME_MINOR_4x4_INTEL 0x3 - -#define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0 -#define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1 -#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2 - -#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0 -#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E -#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D -#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B -#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77 -#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F -#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F -#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F - -#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0 -#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1 -#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2 -#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3 -#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4 -#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5 -#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6 -#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7 -#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8 -#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9 -#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2 -#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa - -#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 -#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2 - -#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 -#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 -#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3 - -#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0 -#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1 -#define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2 -#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3 - -#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10 -#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15 -#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20 -#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B -#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30 - -#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0 -#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2 -#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4 -#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8 - -#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0 -#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000 - -#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 ) - -#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00 -#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80 - -#define CL_AVC_ME_INTRA_16x16_INTEL 0x0 -#define CL_AVC_ME_INTRA_8x8_INTEL 0x1 -#define CL_AVC_ME_INTRA_4x4_INTEL 0x2 - -#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6 -#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5 -#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3 - -#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60 -#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10 -#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8 -#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4 - -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 -#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 -#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 -#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 -#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 - -#define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1 -#define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2 -#define CL_AVC_ME_FRAME_DUAL_INTEL 0x3 - -#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0 -#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1 -#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2 - -#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0 -#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1 - -/******************************************* -* cl_intel_unified_shared_memory extension * -********************************************/ - -/* These APIs are in sync with Revision O of the cl_intel_unified_shared_memory spec! */ - -#define cl_intel_unified_shared_memory 1 - -/* cl_device_info */ -#define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL 0x4190 -#define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL 0x4191 -#define CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4192 -#define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4193 -#define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL 0x4194 - -typedef cl_bitfield cl_device_unified_shared_memory_capabilities_intel; - -/* cl_device_unified_shared_memory_capabilities_intel - bitfield */ -#define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL (1 << 0) -#define CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL (1 << 1) -#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL (1 << 2) -#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL (1 << 3) - -typedef cl_bitfield cl_mem_properties_intel; - -/* cl_mem_properties_intel */ -#define CL_MEM_ALLOC_FLAGS_INTEL 0x4195 - -typedef cl_bitfield cl_mem_alloc_flags_intel; - -/* cl_mem_alloc_flags_intel - bitfield */ -#define CL_MEM_ALLOC_WRITE_COMBINED_INTEL (1 << 0) - -typedef cl_uint cl_mem_info_intel; - -/* cl_mem_alloc_info_intel */ -#define CL_MEM_ALLOC_TYPE_INTEL 0x419A -#define CL_MEM_ALLOC_BASE_PTR_INTEL 0x419B -#define CL_MEM_ALLOC_SIZE_INTEL 0x419C -#define CL_MEM_ALLOC_DEVICE_INTEL 0x419D -/* Enum values 0x419E-0x419F are reserved for future queries. */ - -typedef cl_uint cl_unified_shared_memory_type_intel; - -/* cl_unified_shared_memory_type_intel */ -#define CL_MEM_TYPE_UNKNOWN_INTEL 0x4196 -#define CL_MEM_TYPE_HOST_INTEL 0x4197 -#define CL_MEM_TYPE_DEVICE_INTEL 0x4198 -#define CL_MEM_TYPE_SHARED_INTEL 0x4199 - -typedef cl_uint cl_mem_advice_intel; - -/* cl_mem_advice_intel */ -/* Enum values 0x4208-0x420F are reserved for future memory advices. */ - -/* cl_kernel_exec_info */ -#define CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL 0x4200 -#define CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL 0x4201 -#define CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL 0x4202 -#define CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL 0x4203 - -/* cl_command_type */ -#define CL_COMMAND_MEMFILL_INTEL 0x4204 -#define CL_COMMAND_MEMCPY_INTEL 0x4205 -#define CL_COMMAND_MIGRATEMEM_INTEL 0x4206 -#define CL_COMMAND_MEMADVISE_INTEL 0x4207 - -extern CL_API_ENTRY void* CL_API_CALL -clHostMemAllocINTEL( - cl_context context, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -typedef CL_API_ENTRY void* (CL_API_CALL * -clHostMemAllocINTEL_fn)( - cl_context context, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -extern CL_API_ENTRY void* CL_API_CALL -clDeviceMemAllocINTEL( - cl_context context, - cl_device_id device, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -typedef CL_API_ENTRY void* (CL_API_CALL * -clDeviceMemAllocINTEL_fn)( - cl_context context, - cl_device_id device, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -extern CL_API_ENTRY void* CL_API_CALL -clSharedMemAllocINTEL( - cl_context context, - cl_device_id device, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -typedef CL_API_ENTRY void* (CL_API_CALL * -clSharedMemAllocINTEL_fn)( - cl_context context, - cl_device_id device, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -extern CL_API_ENTRY cl_int CL_API_CALL -clMemFreeINTEL( - cl_context context, - void* ptr); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clMemFreeINTEL_fn)( - cl_context context, - void* ptr); - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetMemAllocInfoINTEL( - cl_context context, - const void* ptr, - cl_mem_info_intel param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clGetMemAllocInfoINTEL_fn)( - cl_context context, - const void* ptr, - cl_mem_info_intel param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret); - -extern CL_API_ENTRY cl_int CL_API_CALL -clSetKernelArgMemPointerINTEL( - cl_kernel kernel, - cl_uint arg_index, - const void* arg_value); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clSetKernelArgMemPointerINTEL_fn)( - cl_kernel kernel, - cl_uint arg_index, - const void* arg_value); - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMemsetINTEL( /* Deprecated */ - cl_command_queue command_queue, - void* dst_ptr, - cl_int value, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clEnqueueMemsetINTEL_fn)( /* Deprecated */ - cl_command_queue command_queue, - void* dst_ptr, - cl_int value, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMemFillINTEL( - cl_command_queue command_queue, - void* dst_ptr, - const void* pattern, - size_t pattern_size, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clEnqueueMemFillINTEL_fn)( - cl_command_queue command_queue, - void* dst_ptr, - const void* pattern, - size_t pattern_size, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMemcpyINTEL( - cl_command_queue command_queue, - cl_bool blocking, - void* dst_ptr, - const void* src_ptr, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clEnqueueMemcpyINTEL_fn)( - cl_command_queue command_queue, - cl_bool blocking, - void* dst_ptr, - const void* src_ptr, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -#ifdef CL_VERSION_1_2 - -/* Because these APIs use cl_mem_migration_flags, they require - OpenCL 1.2: */ - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMigrateMemINTEL( - cl_command_queue command_queue, - const void* ptr, - size_t size, - cl_mem_migration_flags flags, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clEnqueueMigrateMemINTEL_fn)( - cl_command_queue command_queue, - const void* ptr, - size_t size, - cl_mem_migration_flags flags, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -#endif - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMemAdviseINTEL( - cl_command_queue command_queue, - const void* ptr, - size_t size, - cl_mem_advice_intel advice, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clEnqueueMemAdviseINTEL_fn)( - cl_command_queue command_queue, - const void* ptr, - size_t size, - cl_mem_advice_intel advice, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -#ifdef __cplusplus -} -#endif - -#endif /* __CL_EXT_INTEL_H */ diff --git a/src/CL/cl_gl.h b/src/CL/cl_gl.h deleted file mode 100644 index b587f02..0000000 --- a/src/CL/cl_gl.h +++ /dev/null @@ -1,159 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ - -#ifndef __OPENCL_CL_GL_H -#define __OPENCL_CL_GL_H - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -typedef cl_uint cl_gl_object_type; -typedef cl_uint cl_gl_texture_info; -typedef cl_uint cl_gl_platform_info; -typedef struct __GLsync *cl_GLsync; - -/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */ -#define CL_GL_OBJECT_BUFFER 0x2000 -#define CL_GL_OBJECT_TEXTURE2D 0x2001 -#define CL_GL_OBJECT_TEXTURE3D 0x2002 -#define CL_GL_OBJECT_RENDERBUFFER 0x2003 -#ifdef CL_VERSION_1_2 -#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E -#define CL_GL_OBJECT_TEXTURE1D 0x200F -#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 -#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 -#endif - -/* cl_gl_texture_info */ -#define CL_GL_TEXTURE_TARGET 0x2004 -#define CL_GL_MIPMAP_LEVEL 0x2005 -#ifdef CL_VERSION_1_2 -#define CL_GL_NUM_SAMPLES 0x2012 -#endif - - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateFromGLBuffer(cl_context context, - cl_mem_flags flags, - cl_GLuint bufobj, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateFromGLTexture(cl_context context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texture, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -#endif - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateFromGLRenderbuffer(cl_context context, - cl_mem_flags flags, - cl_GLuint renderbuffer, - cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetGLObjectInfo(cl_mem memobj, - cl_gl_object_type * gl_object_type, - cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetGLTextureInfo(cl_mem memobj, - cl_gl_texture_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueAcquireGLObjects(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueReleaseGLObjects(cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem * mem_objects, - cl_uint num_events_in_wait_list, - const cl_event * event_wait_list, - cl_event * event) CL_API_SUFFIX__VERSION_1_0; - - -/* Deprecated OpenCL 1.1 APIs */ -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL -clCreateFromGLTexture2D(cl_context context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texture, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL -clCreateFromGLTexture3D(cl_context context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texture, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -/* cl_khr_gl_sharing extension */ - -#define cl_khr_gl_sharing 1 - -typedef cl_uint cl_gl_context_info; - -/* Additional Error Codes */ -#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 - -/* cl_gl_context_info */ -#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 -#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 - -/* Additional cl_context_properties */ -#define CL_GL_CONTEXT_KHR 0x2008 -#define CL_EGL_DISPLAY_KHR 0x2009 -#define CL_GLX_DISPLAY_KHR 0x200A -#define CL_WGL_HDC_KHR 0x200B -#define CL_CGL_SHAREGROUP_KHR 0x200C - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetGLContextInfoKHR(const cl_context_properties * properties, - cl_gl_context_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( - const cl_context_properties * properties, - cl_gl_context_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret); - -#ifdef __cplusplus -} -#endif - -#endif /* __OPENCL_CL_GL_H */ diff --git a/src/CL/cl_gl_ext.h b/src/CL/cl_gl_ext.h deleted file mode 100644 index 9bb7540..0000000 --- a/src/CL/cl_gl_ext.h +++ /dev/null @@ -1,40 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ - -#ifndef __OPENCL_CL_GL_EXT_H -#define __OPENCL_CL_GL_EXT_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -/* - * cl_khr_gl_event extension - */ -#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D - -extern CL_API_ENTRY cl_event CL_API_CALL -clCreateEventFromGLsyncKHR(cl_context context, - cl_GLsync cl_GLsync, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; - -#ifdef __cplusplus -} -#endif - -#endif /* __OPENCL_CL_GL_EXT_H */ diff --git a/src/CL/cl_half.h b/src/CL/cl_half.h deleted file mode 100644 index f748d9e..0000000 --- a/src/CL/cl_half.h +++ /dev/null @@ -1,440 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2019-2020 The Khronos Group Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ - -/** - * This is a header-only utility library that provides OpenCL host code with - * routines for converting to/from cl_half values. - * - * Example usage: - * - * #include - * ... - * cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE); - * cl_float f = cl_half_to_float(h); - */ - -#ifndef OPENCL_CL_HALF_H -#define OPENCL_CL_HALF_H - -#include - -#include - -#ifdef __cplusplus -extern "C" { -#endif - - -/** - * Rounding mode used when converting to cl_half. - */ -typedef enum -{ - CL_HALF_RTE, // round to nearest even - CL_HALF_RTZ, // round towards zero - CL_HALF_RTP, // round towards positive infinity - CL_HALF_RTN, // round towards negative infinity -} cl_half_rounding_mode; - - -/* Private utility macros. */ -#define CL_HALF_EXP_MASK 0x7C00 -#define CL_HALF_MAX_FINITE_MAG 0x7BFF - - -/* - * Utility to deal with values that overflow when converting to half precision. - */ -static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode, - uint16_t sign) -{ - if (rounding_mode == CL_HALF_RTZ) - { - // Round overflow towards zero -> largest finite number (preserving sign) - return (sign << 15) | CL_HALF_MAX_FINITE_MAG; - } - else if (rounding_mode == CL_HALF_RTP && sign) - { - // Round negative overflow towards positive infinity -> most negative finite number - return (1 << 15) | CL_HALF_MAX_FINITE_MAG; - } - else if (rounding_mode == CL_HALF_RTN && !sign) - { - // Round positive overflow towards negative infinity -> largest finite number - return CL_HALF_MAX_FINITE_MAG; - } - - // Overflow to infinity - return (sign << 15) | CL_HALF_EXP_MASK; -} - -/* - * Utility to deal with values that underflow when converting to half precision. - */ -static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode, - uint16_t sign) -{ - if (rounding_mode == CL_HALF_RTP && !sign) - { - // Round underflow towards positive infinity -> smallest positive value - return (sign << 15) | 1; - } - else if (rounding_mode == CL_HALF_RTN && sign) - { - // Round underflow towards negative infinity -> largest negative value - return (sign << 15) | 1; - } - - // Flush to zero - return (sign << 15); -} - - -/** - * Convert a cl_float to a cl_half. - */ -static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode) -{ - // Type-punning to get direct access to underlying bits - union - { - cl_float f; - uint32_t i; - } f32; - f32.f = f; - - // Extract sign bit - uint16_t sign = f32.i >> 31; - - // Extract FP32 exponent and mantissa - uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF; - uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1); - - // Remove FP32 exponent bias - int32_t exp = f_exp - CL_FLT_MAX_EXP + 1; - - // Add FP16 exponent bias - uint16_t h_exp = exp + CL_HALF_MAX_EXP - 1; - - // Position of the bit that will become the FP16 mantissa LSB - uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG; - - // Check for NaN / infinity - if (f_exp == 0xFF) - { - if (f_mant) - { - // NaN -> propagate mantissa and silence it - uint16_t h_mant = f_mant >> lsb_pos; - h_mant |= 0x200; - return (sign << 15) | CL_HALF_EXP_MASK | h_mant; - } - else - { - // Infinity -> zero mantissa - return (sign << 15) | CL_HALF_EXP_MASK; - } - } - - // Check for zero - if (!f_exp && !f_mant) - { - return (sign << 15); - } - - // Check for overflow - if (exp >= CL_HALF_MAX_EXP) - { - return cl_half_handle_overflow(rounding_mode, sign); - } - - // Check for underflow - if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1)) - { - return cl_half_handle_underflow(rounding_mode, sign); - } - - // Check for value that will become denormal - if (exp < -14) - { - // Denormal -> include the implicit 1 from the FP32 mantissa - h_exp = 0; - f_mant |= 1 << (CL_FLT_MANT_DIG - 1); - - // Mantissa shift amount depends on exponent - lsb_pos = -exp + (CL_FLT_MANT_DIG - 25); - } - - // Generate FP16 mantissa by shifting FP32 mantissa - uint16_t h_mant = f_mant >> lsb_pos; - - // Check whether we need to round - uint32_t halfway = 1 << (lsb_pos - 1); - uint32_t mask = (halfway << 1) - 1; - switch (rounding_mode) - { - case CL_HALF_RTE: - if ((f_mant & mask) > halfway) - { - // More than halfway -> round up - h_mant += 1; - } - else if ((f_mant & mask) == halfway) - { - // Exactly halfway -> round to nearest even - if (h_mant & 0x1) - h_mant += 1; - } - break; - case CL_HALF_RTZ: - // Mantissa has already been truncated -> do nothing - break; - case CL_HALF_RTP: - if ((f_mant & mask) && !sign) - { - // Round positive numbers up - h_mant += 1; - } - break; - case CL_HALF_RTN: - if ((f_mant & mask) && sign) - { - // Round negative numbers down - h_mant += 1; - } - break; - } - - // Check for mantissa overflow - if (h_mant & 0x400) - { - h_exp += 1; - h_mant = 0; - } - - return (sign << 15) | (h_exp << 10) | h_mant; -} - - -/** - * Convert a cl_double to a cl_half. - */ -static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode) -{ - // Type-punning to get direct access to underlying bits - union - { - cl_double d; - uint64_t i; - } f64; - f64.d = d; - - // Extract sign bit - uint16_t sign = f64.i >> 63; - - // Extract FP64 exponent and mantissa - uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF; - uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1); - - // Remove FP64 exponent bias - int64_t exp = d_exp - CL_DBL_MAX_EXP + 1; - - // Add FP16 exponent bias - uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1); - - // Position of the bit that will become the FP16 mantissa LSB - uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG; - - // Check for NaN / infinity - if (d_exp == 0x7FF) - { - if (d_mant) - { - // NaN -> propagate mantissa and silence it - uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos); - h_mant |= 0x200; - return (sign << 15) | CL_HALF_EXP_MASK | h_mant; - } - else - { - // Infinity -> zero mantissa - return (sign << 15) | CL_HALF_EXP_MASK; - } - } - - // Check for zero - if (!d_exp && !d_mant) - { - return (sign << 15); - } - - // Check for overflow - if (exp >= CL_HALF_MAX_EXP) - { - return cl_half_handle_overflow(rounding_mode, sign); - } - - // Check for underflow - if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1)) - { - return cl_half_handle_underflow(rounding_mode, sign); - } - - // Check for value that will become denormal - if (exp < -14) - { - // Include the implicit 1 from the FP64 mantissa - h_exp = 0; - d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1); - - // Mantissa shift amount depends on exponent - lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25)); - } - - // Generate FP16 mantissa by shifting FP64 mantissa - uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos); - - // Check whether we need to round - uint64_t halfway = (uint64_t)1 << (lsb_pos - 1); - uint64_t mask = (halfway << 1) - 1; - switch (rounding_mode) - { - case CL_HALF_RTE: - if ((d_mant & mask) > halfway) - { - // More than halfway -> round up - h_mant += 1; - } - else if ((d_mant & mask) == halfway) - { - // Exactly halfway -> round to nearest even - if (h_mant & 0x1) - h_mant += 1; - } - break; - case CL_HALF_RTZ: - // Mantissa has already been truncated -> do nothing - break; - case CL_HALF_RTP: - if ((d_mant & mask) && !sign) - { - // Round positive numbers up - h_mant += 1; - } - break; - case CL_HALF_RTN: - if ((d_mant & mask) && sign) - { - // Round negative numbers down - h_mant += 1; - } - break; - } - - // Check for mantissa overflow - if (h_mant & 0x400) - { - h_exp += 1; - h_mant = 0; - } - - return (sign << 15) | (h_exp << 10) | h_mant; -} - - -/** - * Convert a cl_half to a cl_float. - */ -static inline cl_float cl_half_to_float(cl_half h) -{ - // Type-punning to get direct access to underlying bits - union - { - cl_float f; - uint32_t i; - } f32; - - // Extract sign bit - uint16_t sign = h >> 15; - - // Extract FP16 exponent and mantissa - uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F; - uint16_t h_mant = h & 0x3FF; - - // Remove FP16 exponent bias - int32_t exp = h_exp - CL_HALF_MAX_EXP + 1; - - // Add FP32 exponent bias - uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1; - - // Check for NaN / infinity - if (h_exp == 0x1F) - { - if (h_mant) - { - // NaN -> propagate mantissa and silence it - uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG); - f_mant |= 0x400000; - f32.i = (sign << 31) | 0x7F800000 | f_mant; - return f32.f; - } - else - { - // Infinity -> zero mantissa - f32.i = (sign << 31) | 0x7F800000; - return f32.f; - } - } - - // Check for zero / denormal - if (h_exp == 0) - { - if (h_mant == 0) - { - // Zero -> zero exponent - f_exp = 0; - } - else - { - // Denormal -> normalize it - // - Shift mantissa to make most-significant 1 implicit - // - Adjust exponent accordingly - uint32_t shift = 0; - while ((h_mant & 0x400) == 0) - { - h_mant <<= 1; - shift++; - } - h_mant &= 0x3FF; - f_exp -= shift - 1; - } - } - - f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13); - return f32.f; -} - - -#undef CL_HALF_EXP_MASK -#undef CL_HALF_MAX_FINITE_MAG - - -#ifdef __cplusplus -} -#endif - - -#endif /* OPENCL_CL_HALF_H */ diff --git a/src/CL/cl_icd.h b/src/CL/cl_icd.h deleted file mode 100644 index 8c74724..0000000 --- a/src/CL/cl_icd.h +++ /dev/null @@ -1,1287 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2019-2020 The Khronos Group Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ - -#ifndef OPENCL_CL_ICD_H -#define OPENCL_CL_ICD_H - -#include -#include -#include -#include - -#if defined(_WIN32) -#include -#include -#include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * This file contains pointer type definitions for each of the CL API calls as - * well as a type definition for the dispatch table used by the Khronos ICD - * loader (see cl_khr_icd extension specification for background). - */ - -/* API function pointer definitions */ - -// Platform APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPlatformIDs)( - cl_uint num_entries, cl_platform_id *platforms, - cl_uint *num_platforms) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPlatformInfo)( - cl_platform_id platform, cl_platform_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -// Device APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDs)( - cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, - cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceInfo)( - cl_device_id device, cl_device_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateSubDevices)( - cl_device_id in_device, - const cl_device_partition_property *partition_properties, - cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainDevice)( - cl_device_id device) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseDevice)( - cl_device_id device) CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clCreateSubDevices; -typedef void *cl_api_clRetainDevice; -typedef void *cl_api_clReleaseDevice; - -#endif - -// Context APIs -typedef CL_API_ENTRY cl_context(CL_API_CALL *cl_api_clCreateContext)( - const cl_context_properties *properties, cl_uint num_devices, - const cl_device_id *devices, - void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), - void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_context(CL_API_CALL *cl_api_clCreateContextFromType)( - const cl_context_properties *properties, cl_device_type device_type, - void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), - void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainContext)( - cl_context context) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseContext)( - cl_context context) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetContextInfo)( - cl_context context, cl_context_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -// Command Queue APIs -typedef CL_API_ENTRY cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueue)( - cl_context context, cl_device_id device, - cl_command_queue_properties properties, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_2_0 - -typedef CL_API_ENTRY -cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueueWithProperties)( - cl_context /* context */, cl_device_id /* device */, - const cl_queue_properties * /* properties */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; - -#else - -typedef void *cl_api_clCreateCommandQueueWithProperties; - -#endif - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainCommandQueue)( - cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseCommandQueue)( - cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetCommandQueueInfo)( - cl_command_queue command_queue, cl_command_queue_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -// Memory Object APIs -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateBuffer)( - cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage)( - cl_context context, cl_mem_flags flags, const cl_image_format *image_format, - const cl_image_desc *image_desc, void *host_ptr, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clCreateImage; - -#endif - -#ifdef CL_VERSION_3_0 - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateBufferWithProperties)( - cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, - size_t size, void *host_ptr, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_3_0; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImageWithProperties)( - cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, - const cl_image_format *image_format, const cl_image_desc *image_desc, - void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_3_0; - -#else - -typedef void *cl_api_clCreateBufferWithProperties; -typedef void *cl_api_clCreateImageWithProperties; - -#endif - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainMemObject)( - cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseMemObject)( - cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetSupportedImageFormats)( - cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, - cl_uint num_entries, cl_image_format *image_formats, - cl_uint *num_image_formats) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetMemObjectInfo)( - cl_mem memobj, cl_mem_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetImageInfo)( - cl_mem image, cl_image_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_2_0 - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreatePipe)( - cl_context /* context */, cl_mem_flags /* flags */, - cl_uint /* pipe_packet_size */, cl_uint /* pipe_max_packets */, - const cl_pipe_properties * /* properties */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPipeInfo)( - cl_mem /* pipe */, cl_pipe_info /* param_name */, - size_t /* param_value_size */, void * /* param_value */, - size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clSVMAlloc)( - cl_context /* context */, cl_svm_mem_flags /* flags */, size_t /* size */, - unsigned int /* alignment */)CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY void(CL_API_CALL *cl_api_clSVMFree)( - cl_context /* context */, - void * /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0; - -#else - -typedef void *cl_api_clCreatePipe; -typedef void *cl_api_clGetPipeInfo; -typedef void *cl_api_clSVMAlloc; -typedef void *cl_api_clSVMFree; - -#endif - -// Sampler APIs -typedef CL_API_ENTRY cl_sampler(CL_API_CALL *cl_api_clCreateSampler)( - cl_context context, cl_bool normalized_coords, - cl_addressing_mode addressing_mode, cl_filter_mode filter_mode, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainSampler)( - cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseSampler)( - cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetSamplerInfo)( - cl_sampler sampler, cl_sampler_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_2_0 - -typedef CL_API_ENTRY -cl_sampler(CL_API_CALL *cl_api_clCreateSamplerWithProperties)( - cl_context /* context */, - const cl_sampler_properties * /* sampler_properties */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; - -#else - -typedef void *cl_api_clCreateSamplerWithProperties; - -#endif - -// Program Object APIs -typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithSource)( - cl_context context, cl_uint count, const char **strings, - const size_t *lengths, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithBinary)( - cl_context context, cl_uint num_devices, const cl_device_id *device_list, - const size_t *lengths, const unsigned char **binaries, - cl_int *binary_status, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY -cl_program(CL_API_CALL *cl_api_clCreateProgramWithBuiltInKernels)( - cl_context context, cl_uint num_devices, const cl_device_id *device_list, - const char *kernel_names, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clCreateProgramWithBuiltInKernels; - -#endif - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainProgram)( - cl_program program) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseProgram)( - cl_program program) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clBuildProgram)( - cl_program program, cl_uint num_devices, const cl_device_id *device_list, - const char *options, - void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), - void *user_data) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCompileProgram)( - cl_program program, cl_uint num_devices, const cl_device_id *device_list, - const char *options, cl_uint num_input_headers, - const cl_program *input_headers, const char **header_include_names, - void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), - void *user_data) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clLinkProgram)( - cl_context context, cl_uint num_devices, const cl_device_id *device_list, - const char *options, cl_uint num_input_programs, - const cl_program *input_programs, - void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), - void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clCompileProgram; -typedef void *cl_api_clLinkProgram; - -#endif - -#ifdef CL_VERSION_2_2 - -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clSetProgramSpecializationConstant)( - cl_program program, cl_uint spec_id, size_t spec_size, - const void *spec_value) CL_API_SUFFIX__VERSION_2_2; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetProgramReleaseCallback)( - cl_program program, - void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), - void *user_data) CL_API_SUFFIX__VERSION_2_2; - -#else - -typedef void *cl_api_clSetProgramSpecializationConstant; -typedef void *cl_api_clSetProgramReleaseCallback; - -#endif - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clUnloadPlatformCompiler)( - cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clUnloadPlatformCompiler; - -#endif - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetProgramInfo)( - cl_program program, cl_program_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetProgramBuildInfo)( - cl_program program, cl_device_id device, cl_program_build_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -// Kernel Object APIs -typedef CL_API_ENTRY cl_kernel(CL_API_CALL *cl_api_clCreateKernel)( - cl_program program, const char *kernel_name, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateKernelsInProgram)( - cl_program program, cl_uint num_kernels, cl_kernel *kernels, - cl_uint *num_kernels_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainKernel)( - cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseKernel)( - cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelArg)( - cl_kernel kernel, cl_uint arg_index, size_t arg_size, - const void *arg_value) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelInfo)( - cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelArgInfo)( - cl_kernel kernel, cl_uint arg_indx, cl_kernel_arg_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clGetKernelArgInfo; - -#endif - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelWorkGroupInfo)( - cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_2_0 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelArgSVMPointer)( - cl_kernel /* kernel */, cl_uint /* arg_index */, - const void * /* arg_value */) CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelExecInfo)( - cl_kernel /* kernel */, cl_kernel_exec_info /* param_name */, - size_t /* param_value_size */, - const void * /* param_value */) CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfoKHR)( - cl_kernel /* in_kernel */, cl_device_id /*in_device*/, - cl_kernel_sub_group_info /* param_name */, size_t /*input_value_size*/, - const void * /*input_value*/, size_t /*param_value_size*/, - void * /*param_value*/, - size_t * /*param_value_size_ret*/) CL_EXT_SUFFIX__VERSION_2_0; - -#else - -typedef void *cl_api_clSetKernelArgSVMPointer; -typedef void *cl_api_clSetKernelExecInfo; -typedef void *cl_api_clGetKernelSubGroupInfoKHR; - -#endif - -// Event Object APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clWaitForEvents)( - cl_uint num_events, const cl_event *event_list) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetEventInfo)( - cl_event event, cl_event_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainEvent)(cl_event event) - CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseEvent)(cl_event event) - CL_API_SUFFIX__VERSION_1_0; - -// Profiling APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetEventProfilingInfo)( - cl_event event, cl_profiling_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -// Flush and Finish APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clFlush)( - cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clFinish)( - cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; - -// Enqueued Commands APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBuffer)( - cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, - size_t offset, size_t cb, void *ptr, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_1 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBufferRect)( - cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, - const size_t *buffer_origin, const size_t *host_origin, - const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, - size_t host_row_pitch, size_t host_slice_pitch, void *ptr, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_1; - -#else - -typedef void *cl_api_clEnqueueReadBufferRect; - -#endif - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBuffer)( - cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, - size_t offset, size_t cb, const void *ptr, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_1 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBufferRect)( - cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, - const size_t *buffer_origin, const size_t *host_origin, - const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, - size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_1; - -#else - -typedef void *cl_api_clEnqueueWriteBufferRect; - -#endif - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueFillBuffer)( - cl_command_queue command_queue, cl_mem buffer, const void *pattern, - size_t pattern_size, size_t offset, size_t cb, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clEnqueueFillBuffer; - -#endif - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBuffer)( - cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, - size_t src_offset, size_t dst_offset, size_t cb, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_1 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferRect)( - cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, - const size_t *src_origin, const size_t *dst_origin, const size_t *region, - size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, - size_t dst_slice_pitch, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_1; - -#else - -typedef void *cl_api_clEnqueueCopyBufferRect; - -#endif - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadImage)( - cl_command_queue command_queue, cl_mem image, cl_bool blocking_read, - const size_t *origin, const size_t *region, size_t row_pitch, - size_t slice_pitch, void *ptr, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteImage)( - cl_command_queue command_queue, cl_mem image, cl_bool blocking_write, - const size_t *origin, const size_t *region, size_t input_row_pitch, - size_t input_slice_pitch, const void *ptr, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueFillImage)( - cl_command_queue command_queue, cl_mem image, const void *fill_color, - const size_t origin[3], const size_t region[3], - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clEnqueueFillImage; - -#endif - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyImage)( - cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image, - const size_t *src_origin, const size_t *dst_origin, const size_t *region, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyImageToBuffer)( - cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer, - const size_t *src_origin, const size_t *region, size_t dst_offset, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferToImage)( - cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_image, - size_t src_offset, const size_t *dst_origin, const size_t *region, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clEnqueueMapBuffer)( - cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, - cl_map_flags map_flags, size_t offset, size_t cb, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clEnqueueMapImage)( - cl_command_queue command_queue, cl_mem image, cl_bool blocking_map, - cl_map_flags map_flags, const size_t *origin, const size_t *region, - size_t *image_row_pitch, size_t *image_slice_pitch, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueUnmapMemObject)( - cl_command_queue command_queue, cl_mem memobj, void *mapped_ptr, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMigrateMemObjects)( - cl_command_queue command_queue, cl_uint num_mem_objects, - const cl_mem *mem_objects, cl_mem_migration_flags flags, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clEnqueueMigrateMemObjects; - -#endif - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNDRangeKernel)( - cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, - const size_t *global_work_offset, const size_t *global_work_size, - const size_t *local_work_size, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueTask)( - cl_command_queue command_queue, cl_kernel kernel, - cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNativeKernel)( - cl_command_queue command_queue, void(CL_CALLBACK *user_func)(void *), - void *args, size_t cb_args, cl_uint num_mem_objects, const cl_mem *mem_list, - const void **args_mem_loc, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -#ifdef CL_VERSION_1_2 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMarkerWithWaitList)( - cl_command_queue command_queue, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueBarrierWithWaitList)( - cl_command_queue command_queue, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY void *( - CL_API_CALL *cl_api_clGetExtensionFunctionAddressForPlatform)( - cl_platform_id platform, - const char *function_name)CL_API_SUFFIX__VERSION_1_2; - -#else - -typedef void *cl_api_clEnqueueMarkerWithWaitList; -typedef void *cl_api_clEnqueueBarrierWithWaitList; -typedef void *cl_api_clGetExtensionFunctionAddressForPlatform; - -#endif - -// Shared Virtual Memory APIs - -#ifdef CL_VERSION_2_0 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMFree)( - cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */, - void ** /* svm_pointers */, - void(CL_CALLBACK *pfn_free_func)(cl_command_queue /* queue */, - cl_uint /* num_svm_pointers */, - void ** /* svm_pointers[] */, - void * /* user_data */), - void * /* user_data */, cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemcpy)( - cl_command_queue /* command_queue */, cl_bool /* blocking_copy */, - void * /* dst_ptr */, const void * /* src_ptr */, size_t /* size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemFill)( - cl_command_queue /* command_queue */, void * /* svm_ptr */, - const void * /* pattern */, size_t /* pattern_size */, size_t /* size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMap)( - cl_command_queue /* command_queue */, cl_bool /* blocking_map */, - cl_map_flags /* map_flags */, void * /* svm_ptr */, size_t /* size */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMUnmap)( - cl_command_queue /* command_queue */, void * /* svm_ptr */, - cl_uint /* num_events_in_wait_list */, - const cl_event * /* event_wait_list */, - cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; - -#else - -typedef void *cl_api_clEnqueueSVMFree; -typedef void *cl_api_clEnqueueSVMMemcpy; -typedef void *cl_api_clEnqueueSVMMemFill; -typedef void *cl_api_clEnqueueSVMMap; -typedef void *cl_api_clEnqueueSVMUnmap; - -#endif - -// Deprecated APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetCommandQueueProperty)( - cl_command_queue command_queue, cl_command_queue_properties properties, - cl_bool enable, cl_command_queue_properties *old_properties) - CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage2D)( - cl_context context, cl_mem_flags flags, const cl_image_format *image_format, - size_t image_width, size_t image_height, size_t image_row_pitch, - void *host_ptr, cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage3D)( - cl_context context, cl_mem_flags flags, const cl_image_format *image_format, - size_t image_width, size_t image_height, size_t image_depth, - size_t image_row_pitch, size_t image_slice_pitch, void *host_ptr, - cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clUnloadCompiler)(void) - CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMarker)( - cl_command_queue command_queue, - cl_event *event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWaitForEvents)( - cl_command_queue command_queue, cl_uint num_events, - const cl_event *event_list) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueBarrier)( - cl_command_queue command_queue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clGetExtensionFunctionAddress)( - const char *function_name)CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; - -// GL and other APIs -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLBuffer)( - cl_context context, cl_mem_flags flags, cl_GLuint bufobj, - int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture)( - cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, - cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture2D)( - cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, - cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture3D)( - cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, - cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLRenderbuffer)( - cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLObjectInfo)( - cl_mem memobj, cl_gl_object_type *gl_object_type, - cl_GLuint *gl_object_name) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLTextureInfo)( - cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireGLObjects)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseGLObjects)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -/* cl_khr_gl_sharing */ -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLContextInfoKHR)( - const cl_context_properties *properties, cl_gl_context_info param_name, - size_t param_value_size, void *param_value, size_t *param_value_size_ret); - -/* cl_khr_gl_event */ -typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateEventFromGLsyncKHR)( - cl_context context, cl_GLsync sync, cl_int *errcode_ret); - -#if defined(_WIN32) - -/* cl_khr_d3d10_sharing */ - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D10KHR)( - cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, - void *d3d_object, cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, cl_device_id *devices, - cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D10Buffer *resource, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture2DKHR)( - cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, - UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture3DKHR)( - cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, - UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_0; - -extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D10KHR( - cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, - void *d3d_object, cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices); - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateFromD3D10BufferKHR(cl_context context, cl_mem_flags flags, - ID3D10Buffer *resource, cl_int *errcode_ret); - -extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture2DKHR( - cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, - UINT subresource, cl_int *errcode_ret); - -extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D10Texture3DKHR( - cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, - UINT subresource, cl_int *errcode_ret); - -extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D10ObjectsKHR( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - -extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - -/* cl_khr_d3d11_sharing */ -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D11KHR)( - cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, - void *d3d_object, cl_d3d11_device_set_khr d3d_device_set, - cl_uint num_entries, cl_device_id *devices, - cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D11Buffer *resource, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture2DKHR)( - cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, - UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture3DKHR)( - cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, - UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D11ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D11ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - -/* cl_khr_dx9_media_sharing */ -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR)( - cl_platform_id platform, cl_uint num_media_adapters, - cl_dx9_media_adapter_type_khr *media_adapters_type, void *media_adapters, - cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, - cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromDX9MediaSurfaceKHR)( - cl_context context, cl_mem_flags flags, - cl_dx9_media_adapter_type_khr adapter_type, void *surface_info, - cl_uint plane, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clEnqueueAcquireDX9MediaSurfacesKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clEnqueueReleaseDX9MediaSurfacesKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_1_2; - -/* cl_khr_d3d11_sharing */ -extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromD3D11KHR( - cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, - void *d3d_object, cl_d3d11_device_set_khr d3d_device_set, - cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices); - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateFromD3D11BufferKHR(cl_context context, cl_mem_flags flags, - ID3D11Buffer *resource, cl_int *errcode_ret); - -extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture2DKHR( - cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, - UINT subresource, cl_int *errcode_ret); - -extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromD3D11Texture3DKHR( - cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, - UINT subresource, cl_int *errcode_ret); - -extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireD3D11ObjectsKHR( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - -extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - -/* cl_khr_dx9_media_sharing */ -extern CL_API_ENTRY cl_int CL_API_CALL clGetDeviceIDsFromDX9MediaAdapterKHR( - cl_platform_id platform, cl_uint num_media_adapters, - cl_dx9_media_adapter_type_khr *media_adapter_type, void *media_adapters, - cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, - cl_device_id *devices, cl_uint *num_devices); - -extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromDX9MediaSurfaceKHR( - cl_context context, cl_mem_flags flags, - cl_dx9_media_adapter_type_khr adapter_type, void *surface_info, - cl_uint plane, cl_int *errcode_ret); - -extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireDX9MediaSurfacesKHR( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - -extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - -#else - -/* cl_khr_d3d10_sharing */ -typedef void *cl_api_clGetDeviceIDsFromD3D10KHR; -typedef void *cl_api_clCreateFromD3D10BufferKHR; -typedef void *cl_api_clCreateFromD3D10Texture2DKHR; -typedef void *cl_api_clCreateFromD3D10Texture3DKHR; -typedef void *cl_api_clEnqueueAcquireD3D10ObjectsKHR; -typedef void *cl_api_clEnqueueReleaseD3D10ObjectsKHR; - -/* cl_khr_d3d11_sharing */ -typedef void *cl_api_clGetDeviceIDsFromD3D11KHR; -typedef void *cl_api_clCreateFromD3D11BufferKHR; -typedef void *cl_api_clCreateFromD3D11Texture2DKHR; -typedef void *cl_api_clCreateFromD3D11Texture3DKHR; -typedef void *cl_api_clEnqueueAcquireD3D11ObjectsKHR; -typedef void *cl_api_clEnqueueReleaseD3D11ObjectsKHR; - -/* cl_khr_dx9_media_sharing */ -typedef void *cl_api_clCreateFromDX9MediaSurfaceKHR; -typedef void *cl_api_clEnqueueAcquireDX9MediaSurfacesKHR; -typedef void *cl_api_clEnqueueReleaseDX9MediaSurfacesKHR; -typedef void *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR; - -#endif - -/* OpenCL 1.1 */ - -#ifdef CL_VERSION_1_1 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetEventCallback)( - cl_event /* event */, cl_int /* command_exec_callback_type */, - void(CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), - void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateSubBuffer)( - cl_mem /* buffer */, cl_mem_flags /* flags */, - cl_buffer_create_type /* buffer_create_type */, - const void * /* buffer_create_info */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY -cl_int(CL_API_CALL *cl_api_clSetMemObjectDestructorCallback)( - cl_mem /* memobj */, - void(CL_CALLBACK * /*pfn_notify*/)(cl_mem /* memobj */, - void * /*user_data*/), - void * /*user_data */) CL_API_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateUserEvent)( - cl_context /* context */, - cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetUserEventStatus)( - cl_event /* event */, - cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; - -#else - -typedef void *cl_api_clSetEventCallback; -typedef void *cl_api_clCreateSubBuffer; -typedef void *cl_api_clSetMemObjectDestructorCallback; -typedef void *cl_api_clCreateUserEvent; -typedef void *cl_api_clSetUserEventStatus; - -#endif - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateSubDevicesEXT)( - cl_device_id in_device, - const cl_device_partition_property_ext *partition_properties, - cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainDeviceEXT)( - cl_device_id device) CL_API_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseDeviceEXT)( - cl_device_id device) CL_API_SUFFIX__VERSION_1_0; - -/* cl_khr_egl_image */ -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromEGLImageKHR)( - cl_context context, CLeglDisplayKHR display, CLeglImageKHR image, - cl_mem_flags flags, const cl_egl_image_properties_khr *properties, - cl_int *errcode_ret); - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireEGLObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseEGLObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem *mem_objects, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, cl_event *event); - -/* cl_khr_egl_event */ -typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateEventFromEGLSyncKHR)( - cl_context context, CLeglSyncKHR sync, CLeglDisplayKHR display, - cl_int *errcode_ret); - -#ifdef CL_VERSION_2_1 - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetDefaultDeviceCommandQueue)( - cl_context context, cl_device_id device, - cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1; - -typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithIL)( - cl_context context, const void *il, size_t length, - cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfo)( - cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info param_name, - size_t input_value_size, const void *input_value, size_t param_value_size, - void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_2_1; - -typedef CL_API_ENTRY cl_kernel(CL_API_CALL *cl_api_clCloneKernel)( - cl_kernel source_kernel, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMigrateMem)( - cl_command_queue command_queue, cl_uint num_svm_pointers, - const void **svm_pointers, const size_t *sizes, - cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, - const cl_event *event_wait_list, - cl_event *event) CL_API_SUFFIX__VERSION_2_1; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceAndHostTimer)( - cl_device_id device, cl_ulong *device_timestamp, - cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; - -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetHostTimer)( - cl_device_id device, cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; - -#else - -typedef void *cl_api_clSetDefaultDeviceCommandQueue; -typedef void *cl_api_clCreateProgramWithIL; -typedef void *cl_api_clGetKernelSubGroupInfo; -typedef void *cl_api_clCloneKernel; -typedef void *cl_api_clEnqueueSVMMigrateMem; -typedef void *cl_api_clGetDeviceAndHostTimer; -typedef void *cl_api_clGetHostTimer; - -#endif - -/* Vendor dispatch table struture */ - -typedef struct _cl_icd_dispatch { - /* OpenCL 1.0 */ - cl_api_clGetPlatformIDs clGetPlatformIDs; - cl_api_clGetPlatformInfo clGetPlatformInfo; - cl_api_clGetDeviceIDs clGetDeviceIDs; - cl_api_clGetDeviceInfo clGetDeviceInfo; - cl_api_clCreateContext clCreateContext; - cl_api_clCreateContextFromType clCreateContextFromType; - cl_api_clRetainContext clRetainContext; - cl_api_clReleaseContext clReleaseContext; - cl_api_clGetContextInfo clGetContextInfo; - cl_api_clCreateCommandQueue clCreateCommandQueue; - cl_api_clRetainCommandQueue clRetainCommandQueue; - cl_api_clReleaseCommandQueue clReleaseCommandQueue; - cl_api_clGetCommandQueueInfo clGetCommandQueueInfo; - cl_api_clSetCommandQueueProperty clSetCommandQueueProperty; - cl_api_clCreateBuffer clCreateBuffer; - cl_api_clCreateImage2D clCreateImage2D; - cl_api_clCreateImage3D clCreateImage3D; - cl_api_clRetainMemObject clRetainMemObject; - cl_api_clReleaseMemObject clReleaseMemObject; - cl_api_clGetSupportedImageFormats clGetSupportedImageFormats; - cl_api_clGetMemObjectInfo clGetMemObjectInfo; - cl_api_clGetImageInfo clGetImageInfo; - cl_api_clCreateSampler clCreateSampler; - cl_api_clRetainSampler clRetainSampler; - cl_api_clReleaseSampler clReleaseSampler; - cl_api_clGetSamplerInfo clGetSamplerInfo; - cl_api_clCreateProgramWithSource clCreateProgramWithSource; - cl_api_clCreateProgramWithBinary clCreateProgramWithBinary; - cl_api_clRetainProgram clRetainProgram; - cl_api_clReleaseProgram clReleaseProgram; - cl_api_clBuildProgram clBuildProgram; - cl_api_clUnloadCompiler clUnloadCompiler; - cl_api_clGetProgramInfo clGetProgramInfo; - cl_api_clGetProgramBuildInfo clGetProgramBuildInfo; - cl_api_clCreateKernel clCreateKernel; - cl_api_clCreateKernelsInProgram clCreateKernelsInProgram; - cl_api_clRetainKernel clRetainKernel; - cl_api_clReleaseKernel clReleaseKernel; - cl_api_clSetKernelArg clSetKernelArg; - cl_api_clGetKernelInfo clGetKernelInfo; - cl_api_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo; - cl_api_clWaitForEvents clWaitForEvents; - cl_api_clGetEventInfo clGetEventInfo; - cl_api_clRetainEvent clRetainEvent; - cl_api_clReleaseEvent clReleaseEvent; - cl_api_clGetEventProfilingInfo clGetEventProfilingInfo; - cl_api_clFlush clFlush; - cl_api_clFinish clFinish; - cl_api_clEnqueueReadBuffer clEnqueueReadBuffer; - cl_api_clEnqueueWriteBuffer clEnqueueWriteBuffer; - cl_api_clEnqueueCopyBuffer clEnqueueCopyBuffer; - cl_api_clEnqueueReadImage clEnqueueReadImage; - cl_api_clEnqueueWriteImage clEnqueueWriteImage; - cl_api_clEnqueueCopyImage clEnqueueCopyImage; - cl_api_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer; - cl_api_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage; - cl_api_clEnqueueMapBuffer clEnqueueMapBuffer; - cl_api_clEnqueueMapImage clEnqueueMapImage; - cl_api_clEnqueueUnmapMemObject clEnqueueUnmapMemObject; - cl_api_clEnqueueNDRangeKernel clEnqueueNDRangeKernel; - cl_api_clEnqueueTask clEnqueueTask; - cl_api_clEnqueueNativeKernel clEnqueueNativeKernel; - cl_api_clEnqueueMarker clEnqueueMarker; - cl_api_clEnqueueWaitForEvents clEnqueueWaitForEvents; - cl_api_clEnqueueBarrier clEnqueueBarrier; - cl_api_clGetExtensionFunctionAddress clGetExtensionFunctionAddress; - cl_api_clCreateFromGLBuffer clCreateFromGLBuffer; - cl_api_clCreateFromGLTexture2D clCreateFromGLTexture2D; - cl_api_clCreateFromGLTexture3D clCreateFromGLTexture3D; - cl_api_clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer; - cl_api_clGetGLObjectInfo clGetGLObjectInfo; - cl_api_clGetGLTextureInfo clGetGLTextureInfo; - cl_api_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects; - cl_api_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects; - cl_api_clGetGLContextInfoKHR clGetGLContextInfoKHR; - - /* cl_khr_d3d10_sharing */ - cl_api_clGetDeviceIDsFromD3D10KHR clGetDeviceIDsFromD3D10KHR; - cl_api_clCreateFromD3D10BufferKHR clCreateFromD3D10BufferKHR; - cl_api_clCreateFromD3D10Texture2DKHR clCreateFromD3D10Texture2DKHR; - cl_api_clCreateFromD3D10Texture3DKHR clCreateFromD3D10Texture3DKHR; - cl_api_clEnqueueAcquireD3D10ObjectsKHR clEnqueueAcquireD3D10ObjectsKHR; - cl_api_clEnqueueReleaseD3D10ObjectsKHR clEnqueueReleaseD3D10ObjectsKHR; - - /* OpenCL 1.1 */ - cl_api_clSetEventCallback clSetEventCallback; - cl_api_clCreateSubBuffer clCreateSubBuffer; - cl_api_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback; - cl_api_clCreateUserEvent clCreateUserEvent; - cl_api_clSetUserEventStatus clSetUserEventStatus; - cl_api_clEnqueueReadBufferRect clEnqueueReadBufferRect; - cl_api_clEnqueueWriteBufferRect clEnqueueWriteBufferRect; - cl_api_clEnqueueCopyBufferRect clEnqueueCopyBufferRect; - - /* cl_ext_device_fission */ - cl_api_clCreateSubDevicesEXT clCreateSubDevicesEXT; - cl_api_clRetainDeviceEXT clRetainDeviceEXT; - cl_api_clReleaseDeviceEXT clReleaseDeviceEXT; - - /* cl_khr_gl_event */ - cl_api_clCreateEventFromGLsyncKHR clCreateEventFromGLsyncKHR; - - /* OpenCL 1.2 */ - cl_api_clCreateSubDevices clCreateSubDevices; - cl_api_clRetainDevice clRetainDevice; - cl_api_clReleaseDevice clReleaseDevice; - cl_api_clCreateImage clCreateImage; - cl_api_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels; - cl_api_clCompileProgram clCompileProgram; - cl_api_clLinkProgram clLinkProgram; - cl_api_clUnloadPlatformCompiler clUnloadPlatformCompiler; - cl_api_clGetKernelArgInfo clGetKernelArgInfo; - cl_api_clEnqueueFillBuffer clEnqueueFillBuffer; - cl_api_clEnqueueFillImage clEnqueueFillImage; - cl_api_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects; - cl_api_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList; - cl_api_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList; - cl_api_clGetExtensionFunctionAddressForPlatform - clGetExtensionFunctionAddressForPlatform; - cl_api_clCreateFromGLTexture clCreateFromGLTexture; - - /* cl_khr_d3d11_sharing */ - cl_api_clGetDeviceIDsFromD3D11KHR clGetDeviceIDsFromD3D11KHR; - cl_api_clCreateFromD3D11BufferKHR clCreateFromD3D11BufferKHR; - cl_api_clCreateFromD3D11Texture2DKHR clCreateFromD3D11Texture2DKHR; - cl_api_clCreateFromD3D11Texture3DKHR clCreateFromD3D11Texture3DKHR; - cl_api_clCreateFromDX9MediaSurfaceKHR clCreateFromDX9MediaSurfaceKHR; - cl_api_clEnqueueAcquireD3D11ObjectsKHR clEnqueueAcquireD3D11ObjectsKHR; - cl_api_clEnqueueReleaseD3D11ObjectsKHR clEnqueueReleaseD3D11ObjectsKHR; - - /* cl_khr_dx9_media_sharing */ - cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR - clGetDeviceIDsFromDX9MediaAdapterKHR; - cl_api_clEnqueueAcquireDX9MediaSurfacesKHR - clEnqueueAcquireDX9MediaSurfacesKHR; - cl_api_clEnqueueReleaseDX9MediaSurfacesKHR - clEnqueueReleaseDX9MediaSurfacesKHR; - - /* cl_khr_egl_image */ - cl_api_clCreateFromEGLImageKHR clCreateFromEGLImageKHR; - cl_api_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR; - cl_api_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR; - - /* cl_khr_egl_event */ - cl_api_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR; - - /* OpenCL 2.0 */ - cl_api_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties; - cl_api_clCreatePipe clCreatePipe; - cl_api_clGetPipeInfo clGetPipeInfo; - cl_api_clSVMAlloc clSVMAlloc; - cl_api_clSVMFree clSVMFree; - cl_api_clEnqueueSVMFree clEnqueueSVMFree; - cl_api_clEnqueueSVMMemcpy clEnqueueSVMMemcpy; - cl_api_clEnqueueSVMMemFill clEnqueueSVMMemFill; - cl_api_clEnqueueSVMMap clEnqueueSVMMap; - cl_api_clEnqueueSVMUnmap clEnqueueSVMUnmap; - cl_api_clCreateSamplerWithProperties clCreateSamplerWithProperties; - cl_api_clSetKernelArgSVMPointer clSetKernelArgSVMPointer; - cl_api_clSetKernelExecInfo clSetKernelExecInfo; - - /* cl_khr_sub_groups */ - cl_api_clGetKernelSubGroupInfoKHR clGetKernelSubGroupInfoKHR; - - /* OpenCL 2.1 */ - cl_api_clCloneKernel clCloneKernel; - cl_api_clCreateProgramWithIL clCreateProgramWithIL; - cl_api_clEnqueueSVMMigrateMem clEnqueueSVMMigrateMem; - cl_api_clGetDeviceAndHostTimer clGetDeviceAndHostTimer; - cl_api_clGetHostTimer clGetHostTimer; - cl_api_clGetKernelSubGroupInfo clGetKernelSubGroupInfo; - cl_api_clSetDefaultDeviceCommandQueue clSetDefaultDeviceCommandQueue; - - /* OpenCL 2.2 */ - cl_api_clSetProgramReleaseCallback clSetProgramReleaseCallback; - cl_api_clSetProgramSpecializationConstant clSetProgramSpecializationConstant; - - /* OpenCL 3.0 */ - cl_api_clCreateBufferWithProperties clCreateBufferWithProperties; - cl_api_clCreateImageWithProperties clCreateImageWithProperties; - -} cl_icd_dispatch; - -#ifdef __cplusplus -} -#endif - -#endif /* #ifndef OPENCL_CL_ICD_H */ diff --git a/src/CL/cl_platform.h b/src/CL/cl_platform.h deleted file mode 100644 index 1bd7d4b..0000000 --- a/src/CL/cl_platform.h +++ /dev/null @@ -1,1384 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ - -#ifndef __CL_PLATFORM_H -#define __CL_PLATFORM_H - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(_WIN32) - #define CL_API_ENTRY - #define CL_API_CALL __stdcall - #define CL_CALLBACK __stdcall -#else - #define CL_API_ENTRY - #define CL_API_CALL - #define CL_CALLBACK -#endif - -/* - * Deprecation flags refer to the last version of the header in which the - * feature was not deprecated. - * - * E.g. VERSION_1_1_DEPRECATED means the feature is present in 1.1 without - * deprecation but is deprecated in versions later than 1.1. - */ - -#define CL_EXTENSION_WEAK_LINK -#define CL_API_SUFFIX__VERSION_1_0 -#define CL_EXT_SUFFIX__VERSION_1_0 -#define CL_API_SUFFIX__VERSION_1_1 -#define CL_EXT_SUFFIX__VERSION_1_1 -#define CL_API_SUFFIX__VERSION_1_2 -#define CL_EXT_SUFFIX__VERSION_1_2 -#define CL_API_SUFFIX__VERSION_2_0 -#define CL_EXT_SUFFIX__VERSION_2_0 -#define CL_API_SUFFIX__VERSION_2_1 -#define CL_EXT_SUFFIX__VERSION_2_1 -#define CL_API_SUFFIX__VERSION_2_2 -#define CL_EXT_SUFFIX__VERSION_2_2 -#define CL_API_SUFFIX__VERSION_3_0 -#define CL_EXT_SUFFIX__VERSION_3_0 -#define CL_API_SUFFIX__EXPERIMENTAL -#define CL_EXT_SUFFIX__EXPERIMENTAL - - -#ifdef __GNUC__ - #define CL_EXT_SUFFIX_DEPRECATED __attribute__((deprecated)) - #define CL_EXT_PREFIX_DEPRECATED -#elif defined(_WIN32) - #define CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX_DEPRECATED __declspec(deprecated) -#else - #define CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX_DEPRECATED -#endif - -#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS - #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED -#else - #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED -#endif - -#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS - #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED -#else - #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED -#endif - -#ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS - #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED -#else - #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED CL_EXT_PREFIX_DEPRECATED - #endif - -#ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS - #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED -#else - #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED -#endif - -#ifdef CL_USE_DEPRECATED_OPENCL_2_1_APIS - #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED -#else - #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED -#endif - -#ifdef CL_USE_DEPRECATED_OPENCL_2_2_APIS - #define CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_2_DEPRECATED -#else - #define CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_2_DEPRECATED CL_EXT_PREFIX_DEPRECATED -#endif - -#if (defined (_WIN32) && defined(_MSC_VER)) - -/* scalar types */ -typedef signed __int8 cl_char; -typedef unsigned __int8 cl_uchar; -typedef signed __int16 cl_short; -typedef unsigned __int16 cl_ushort; -typedef signed __int32 cl_int; -typedef unsigned __int32 cl_uint; -typedef signed __int64 cl_long; -typedef unsigned __int64 cl_ulong; - -typedef unsigned __int16 cl_half; -typedef float cl_float; -typedef double cl_double; - -/* Macro names and corresponding values defined by OpenCL */ -#define CL_CHAR_BIT 8 -#define CL_SCHAR_MAX 127 -#define CL_SCHAR_MIN (-127-1) -#define CL_CHAR_MAX CL_SCHAR_MAX -#define CL_CHAR_MIN CL_SCHAR_MIN -#define CL_UCHAR_MAX 255 -#define CL_SHRT_MAX 32767 -#define CL_SHRT_MIN (-32767-1) -#define CL_USHRT_MAX 65535 -#define CL_INT_MAX 2147483647 -#define CL_INT_MIN (-2147483647-1) -#define CL_UINT_MAX 0xffffffffU -#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) -#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) -#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) - -#define CL_FLT_DIG 6 -#define CL_FLT_MANT_DIG 24 -#define CL_FLT_MAX_10_EXP +38 -#define CL_FLT_MAX_EXP +128 -#define CL_FLT_MIN_10_EXP -37 -#define CL_FLT_MIN_EXP -125 -#define CL_FLT_RADIX 2 -#define CL_FLT_MAX 340282346638528859811704183484516925440.0f -#define CL_FLT_MIN 1.175494350822287507969e-38f -#define CL_FLT_EPSILON 1.1920928955078125e-7f - -#define CL_HALF_DIG 3 -#define CL_HALF_MANT_DIG 11 -#define CL_HALF_MAX_10_EXP +4 -#define CL_HALF_MAX_EXP +16 -#define CL_HALF_MIN_10_EXP -4 -#define CL_HALF_MIN_EXP -13 -#define CL_HALF_RADIX 2 -#define CL_HALF_MAX 65504.0f -#define CL_HALF_MIN 6.103515625e-05f -#define CL_HALF_EPSILON 9.765625e-04f - -#define CL_DBL_DIG 15 -#define CL_DBL_MANT_DIG 53 -#define CL_DBL_MAX_10_EXP +308 -#define CL_DBL_MAX_EXP +1024 -#define CL_DBL_MIN_10_EXP -307 -#define CL_DBL_MIN_EXP -1021 -#define CL_DBL_RADIX 2 -#define CL_DBL_MAX 1.7976931348623158e+308 -#define CL_DBL_MIN 2.225073858507201383090e-308 -#define CL_DBL_EPSILON 2.220446049250313080847e-16 - -#define CL_M_E 2.7182818284590452354 -#define CL_M_LOG2E 1.4426950408889634074 -#define CL_M_LOG10E 0.43429448190325182765 -#define CL_M_LN2 0.69314718055994530942 -#define CL_M_LN10 2.30258509299404568402 -#define CL_M_PI 3.14159265358979323846 -#define CL_M_PI_2 1.57079632679489661923 -#define CL_M_PI_4 0.78539816339744830962 -#define CL_M_1_PI 0.31830988618379067154 -#define CL_M_2_PI 0.63661977236758134308 -#define CL_M_2_SQRTPI 1.12837916709551257390 -#define CL_M_SQRT2 1.41421356237309504880 -#define CL_M_SQRT1_2 0.70710678118654752440 - -#define CL_M_E_F 2.718281828f -#define CL_M_LOG2E_F 1.442695041f -#define CL_M_LOG10E_F 0.434294482f -#define CL_M_LN2_F 0.693147181f -#define CL_M_LN10_F 2.302585093f -#define CL_M_PI_F 3.141592654f -#define CL_M_PI_2_F 1.570796327f -#define CL_M_PI_4_F 0.785398163f -#define CL_M_1_PI_F 0.318309886f -#define CL_M_2_PI_F 0.636619772f -#define CL_M_2_SQRTPI_F 1.128379167f -#define CL_M_SQRT2_F 1.414213562f -#define CL_M_SQRT1_2_F 0.707106781f - -#define CL_NAN (CL_INFINITY - CL_INFINITY) -#define CL_HUGE_VALF ((cl_float) 1e50) -#define CL_HUGE_VAL ((cl_double) 1e500) -#define CL_MAXFLOAT CL_FLT_MAX -#define CL_INFINITY CL_HUGE_VALF - -#else - -#include - -/* scalar types */ -typedef int8_t cl_char; -typedef uint8_t cl_uchar; -typedef int16_t cl_short; -typedef uint16_t cl_ushort; -typedef int32_t cl_int; -typedef uint32_t cl_uint; -typedef int64_t cl_long; -typedef uint64_t cl_ulong; - -typedef uint16_t cl_half; -typedef float cl_float; -typedef double cl_double; - -/* Macro names and corresponding values defined by OpenCL */ -#define CL_CHAR_BIT 8 -#define CL_SCHAR_MAX 127 -#define CL_SCHAR_MIN (-127-1) -#define CL_CHAR_MAX CL_SCHAR_MAX -#define CL_CHAR_MIN CL_SCHAR_MIN -#define CL_UCHAR_MAX 255 -#define CL_SHRT_MAX 32767 -#define CL_SHRT_MIN (-32767-1) -#define CL_USHRT_MAX 65535 -#define CL_INT_MAX 2147483647 -#define CL_INT_MIN (-2147483647-1) -#define CL_UINT_MAX 0xffffffffU -#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) -#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) -#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) - -#define CL_FLT_DIG 6 -#define CL_FLT_MANT_DIG 24 -#define CL_FLT_MAX_10_EXP +38 -#define CL_FLT_MAX_EXP +128 -#define CL_FLT_MIN_10_EXP -37 -#define CL_FLT_MIN_EXP -125 -#define CL_FLT_RADIX 2 -#define CL_FLT_MAX 340282346638528859811704183484516925440.0f -#define CL_FLT_MIN 1.175494350822287507969e-38f -#define CL_FLT_EPSILON 1.1920928955078125e-7f - -#define CL_HALF_DIG 3 -#define CL_HALF_MANT_DIG 11 -#define CL_HALF_MAX_10_EXP +4 -#define CL_HALF_MAX_EXP +16 -#define CL_HALF_MIN_10_EXP -4 -#define CL_HALF_MIN_EXP -13 -#define CL_HALF_RADIX 2 -#define CL_HALF_MAX 65504.0f -#define CL_HALF_MIN 6.103515625e-05f -#define CL_HALF_EPSILON 9.765625e-04f - -#define CL_DBL_DIG 15 -#define CL_DBL_MANT_DIG 53 -#define CL_DBL_MAX_10_EXP +308 -#define CL_DBL_MAX_EXP +1024 -#define CL_DBL_MIN_10_EXP -307 -#define CL_DBL_MIN_EXP -1021 -#define CL_DBL_RADIX 2 -#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 -#define CL_DBL_MIN 2.225073858507201383090e-308 -#define CL_DBL_EPSILON 2.220446049250313080847e-16 - -#define CL_M_E 2.7182818284590452354 -#define CL_M_LOG2E 1.4426950408889634074 -#define CL_M_LOG10E 0.43429448190325182765 -#define CL_M_LN2 0.69314718055994530942 -#define CL_M_LN10 2.30258509299404568402 -#define CL_M_PI 3.14159265358979323846 -#define CL_M_PI_2 1.57079632679489661923 -#define CL_M_PI_4 0.78539816339744830962 -#define CL_M_1_PI 0.31830988618379067154 -#define CL_M_2_PI 0.63661977236758134308 -#define CL_M_2_SQRTPI 1.12837916709551257390 -#define CL_M_SQRT2 1.41421356237309504880 -#define CL_M_SQRT1_2 0.70710678118654752440 - -#define CL_M_E_F 2.718281828f -#define CL_M_LOG2E_F 1.442695041f -#define CL_M_LOG10E_F 0.434294482f -#define CL_M_LN2_F 0.693147181f -#define CL_M_LN10_F 2.302585093f -#define CL_M_PI_F 3.141592654f -#define CL_M_PI_2_F 1.570796327f -#define CL_M_PI_4_F 0.785398163f -#define CL_M_1_PI_F 0.318309886f -#define CL_M_2_PI_F 0.636619772f -#define CL_M_2_SQRTPI_F 1.128379167f -#define CL_M_SQRT2_F 1.414213562f -#define CL_M_SQRT1_2_F 0.707106781f - -#if defined( __GNUC__ ) - #define CL_HUGE_VALF __builtin_huge_valf() - #define CL_HUGE_VAL __builtin_huge_val() - #define CL_NAN __builtin_nanf( "" ) -#else - #define CL_HUGE_VALF ((cl_float) 1e50) - #define CL_HUGE_VAL ((cl_double) 1e500) - float nanf( const char * ); - #define CL_NAN nanf( "" ) -#endif -#define CL_MAXFLOAT CL_FLT_MAX -#define CL_INFINITY CL_HUGE_VALF - -#endif - -#include - -/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */ -typedef unsigned int cl_GLuint; -typedef int cl_GLint; -typedef unsigned int cl_GLenum; - -/* - * Vector types - * - * Note: OpenCL requires that all types be naturally aligned. - * This means that vector types must be naturally aligned. - * For example, a vector of four floats must be aligned to - * a 16 byte boundary (calculated as 4 * the natural 4-byte - * alignment of the float). The alignment qualifiers here - * will only function properly if your compiler supports them - * and if you don't actively work to defeat them. For example, - * in order for a cl_float4 to be 16 byte aligned in a struct, - * the start of the struct must itself be 16-byte aligned. - * - * Maintaining proper alignment is the user's responsibility. - */ - -/* Define basic vector types */ -#if defined( __VEC__ ) - #include /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */ - typedef __vector unsigned char __cl_uchar16; - typedef __vector signed char __cl_char16; - typedef __vector unsigned short __cl_ushort8; - typedef __vector signed short __cl_short8; - typedef __vector unsigned int __cl_uint4; - typedef __vector signed int __cl_int4; - typedef __vector float __cl_float4; - #define __CL_UCHAR16__ 1 - #define __CL_CHAR16__ 1 - #define __CL_USHORT8__ 1 - #define __CL_SHORT8__ 1 - #define __CL_UINT4__ 1 - #define __CL_INT4__ 1 - #define __CL_FLOAT4__ 1 -#endif - -#if defined( __SSE__ ) - #if defined( __MINGW64__ ) - #include - #else - #include - #endif - #if defined( __GNUC__ ) - typedef float __cl_float4 __attribute__((vector_size(16))); - #else - typedef __m128 __cl_float4; - #endif - #define __CL_FLOAT4__ 1 -#endif - -#if defined( __SSE2__ ) - #if defined( __MINGW64__ ) - #include - #else - #include - #endif - #if defined( __GNUC__ ) - typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16))); - typedef cl_char __cl_char16 __attribute__((vector_size(16))); - typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16))); - typedef cl_short __cl_short8 __attribute__((vector_size(16))); - typedef cl_uint __cl_uint4 __attribute__((vector_size(16))); - typedef cl_int __cl_int4 __attribute__((vector_size(16))); - typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16))); - typedef cl_long __cl_long2 __attribute__((vector_size(16))); - typedef cl_double __cl_double2 __attribute__((vector_size(16))); - #else - typedef __m128i __cl_uchar16; - typedef __m128i __cl_char16; - typedef __m128i __cl_ushort8; - typedef __m128i __cl_short8; - typedef __m128i __cl_uint4; - typedef __m128i __cl_int4; - typedef __m128i __cl_ulong2; - typedef __m128i __cl_long2; - typedef __m128d __cl_double2; - #endif - #define __CL_UCHAR16__ 1 - #define __CL_CHAR16__ 1 - #define __CL_USHORT8__ 1 - #define __CL_SHORT8__ 1 - #define __CL_INT4__ 1 - #define __CL_UINT4__ 1 - #define __CL_ULONG2__ 1 - #define __CL_LONG2__ 1 - #define __CL_DOUBLE2__ 1 -#endif - -#if defined( __MMX__ ) - #include - #if defined( __GNUC__ ) - typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8))); - typedef cl_char __cl_char8 __attribute__((vector_size(8))); - typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8))); - typedef cl_short __cl_short4 __attribute__((vector_size(8))); - typedef cl_uint __cl_uint2 __attribute__((vector_size(8))); - typedef cl_int __cl_int2 __attribute__((vector_size(8))); - typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8))); - typedef cl_long __cl_long1 __attribute__((vector_size(8))); - typedef cl_float __cl_float2 __attribute__((vector_size(8))); - #else - typedef __m64 __cl_uchar8; - typedef __m64 __cl_char8; - typedef __m64 __cl_ushort4; - typedef __m64 __cl_short4; - typedef __m64 __cl_uint2; - typedef __m64 __cl_int2; - typedef __m64 __cl_ulong1; - typedef __m64 __cl_long1; - typedef __m64 __cl_float2; - #endif - #define __CL_UCHAR8__ 1 - #define __CL_CHAR8__ 1 - #define __CL_USHORT4__ 1 - #define __CL_SHORT4__ 1 - #define __CL_INT2__ 1 - #define __CL_UINT2__ 1 - #define __CL_ULONG1__ 1 - #define __CL_LONG1__ 1 - #define __CL_FLOAT2__ 1 -#endif - -#if defined( __AVX__ ) - #if defined( __MINGW64__ ) - #include - #else - #include - #endif - #if defined( __GNUC__ ) - typedef cl_float __cl_float8 __attribute__((vector_size(32))); - typedef cl_double __cl_double4 __attribute__((vector_size(32))); - #else - typedef __m256 __cl_float8; - typedef __m256d __cl_double4; - #endif - #define __CL_FLOAT8__ 1 - #define __CL_DOUBLE4__ 1 -#endif - -/* Define capabilities for anonymous struct members. */ -#if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -#define __CL_HAS_ANON_STRUCT__ 1 -#define __CL_ANON_STRUCT__ -#elif defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) -#define __CL_HAS_ANON_STRUCT__ 1 -#define __CL_ANON_STRUCT__ __extension__ -#elif defined( _WIN32) && defined(_MSC_VER) - #if _MSC_VER >= 1500 - /* Microsoft Developer Studio 2008 supports anonymous structs, but - * complains by default. */ - #define __CL_HAS_ANON_STRUCT__ 1 - #define __CL_ANON_STRUCT__ - /* Disable warning C4201: nonstandard extension used : nameless - * struct/union */ - #pragma warning( push ) - #pragma warning( disable : 4201 ) - #endif -#else -#define __CL_HAS_ANON_STRUCT__ 0 -#define __CL_ANON_STRUCT__ -#endif - -/* Define alignment keys */ -#if defined( __GNUC__ ) - #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) -#elif defined( _WIN32) && (_MSC_VER) - /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ - /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ - /* #include */ - /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */ - #define CL_ALIGNED(_x) -#else - #warning Need to implement some method to align data here - #define CL_ALIGNED(_x) -#endif - -/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */ -#if __CL_HAS_ANON_STRUCT__ - /* .xyzw and .s0123...{f|F} are supported */ - #define CL_HAS_NAMED_VECTOR_FIELDS 1 - /* .hi and .lo are supported */ - #define CL_HAS_HI_LO_VECTOR_FIELDS 1 -#endif - -/* Define cl_vector types */ - -/* ---- cl_charn ---- */ -typedef union -{ - cl_char CL_ALIGNED(2) s[2]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_char x, y; }; - __CL_ANON_STRUCT__ struct{ cl_char s0, s1; }; - __CL_ANON_STRUCT__ struct{ cl_char lo, hi; }; -#endif -#if defined( __CL_CHAR2__) - __cl_char2 v2; -#endif -}cl_char2; - -typedef union -{ - cl_char CL_ALIGNED(4) s[4]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3; }; - __CL_ANON_STRUCT__ struct{ cl_char2 lo, hi; }; -#endif -#if defined( __CL_CHAR2__) - __cl_char2 v2[2]; -#endif -#if defined( __CL_CHAR4__) - __cl_char4 v4; -#endif -}cl_char4; - -/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */ -typedef cl_char4 cl_char3; - -typedef union -{ - cl_char CL_ALIGNED(8) s[8]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; }; - __CL_ANON_STRUCT__ struct{ cl_char4 lo, hi; }; -#endif -#if defined( __CL_CHAR2__) - __cl_char2 v2[4]; -#endif -#if defined( __CL_CHAR4__) - __cl_char4 v4[2]; -#endif -#if defined( __CL_CHAR8__ ) - __cl_char8 v8; -#endif -}cl_char8; - -typedef union -{ - cl_char CL_ALIGNED(16) s[16]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; - __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; - __CL_ANON_STRUCT__ struct{ cl_char8 lo, hi; }; -#endif -#if defined( __CL_CHAR2__) - __cl_char2 v2[8]; -#endif -#if defined( __CL_CHAR4__) - __cl_char4 v4[4]; -#endif -#if defined( __CL_CHAR8__ ) - __cl_char8 v8[2]; -#endif -#if defined( __CL_CHAR16__ ) - __cl_char16 v16; -#endif -}cl_char16; - - -/* ---- cl_ucharn ---- */ -typedef union -{ - cl_uchar CL_ALIGNED(2) s[2]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_uchar x, y; }; - __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1; }; - __CL_ANON_STRUCT__ struct{ cl_uchar lo, hi; }; -#endif -#if defined( __cl_uchar2__) - __cl_uchar2 v2; -#endif -}cl_uchar2; - -typedef union -{ - cl_uchar CL_ALIGNED(4) s[4]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3; }; - __CL_ANON_STRUCT__ struct{ cl_uchar2 lo, hi; }; -#endif -#if defined( __CL_UCHAR2__) - __cl_uchar2 v2[2]; -#endif -#if defined( __CL_UCHAR4__) - __cl_uchar4 v4; -#endif -}cl_uchar4; - -/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */ -typedef cl_uchar4 cl_uchar3; - -typedef union -{ - cl_uchar CL_ALIGNED(8) s[8]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; }; - __CL_ANON_STRUCT__ struct{ cl_uchar4 lo, hi; }; -#endif -#if defined( __CL_UCHAR2__) - __cl_uchar2 v2[4]; -#endif -#if defined( __CL_UCHAR4__) - __cl_uchar4 v4[2]; -#endif -#if defined( __CL_UCHAR8__ ) - __cl_uchar8 v8; -#endif -}cl_uchar8; - -typedef union -{ - cl_uchar CL_ALIGNED(16) s[16]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; - __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; - __CL_ANON_STRUCT__ struct{ cl_uchar8 lo, hi; }; -#endif -#if defined( __CL_UCHAR2__) - __cl_uchar2 v2[8]; -#endif -#if defined( __CL_UCHAR4__) - __cl_uchar4 v4[4]; -#endif -#if defined( __CL_UCHAR8__ ) - __cl_uchar8 v8[2]; -#endif -#if defined( __CL_UCHAR16__ ) - __cl_uchar16 v16; -#endif -}cl_uchar16; - - -/* ---- cl_shortn ---- */ -typedef union -{ - cl_short CL_ALIGNED(4) s[2]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_short x, y; }; - __CL_ANON_STRUCT__ struct{ cl_short s0, s1; }; - __CL_ANON_STRUCT__ struct{ cl_short lo, hi; }; -#endif -#if defined( __CL_SHORT2__) - __cl_short2 v2; -#endif -}cl_short2; - -typedef union -{ - cl_short CL_ALIGNED(8) s[4]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3; }; - __CL_ANON_STRUCT__ struct{ cl_short2 lo, hi; }; -#endif -#if defined( __CL_SHORT2__) - __cl_short2 v2[2]; -#endif -#if defined( __CL_SHORT4__) - __cl_short4 v4; -#endif -}cl_short4; - -/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */ -typedef cl_short4 cl_short3; - -typedef union -{ - cl_short CL_ALIGNED(16) s[8]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; }; - __CL_ANON_STRUCT__ struct{ cl_short4 lo, hi; }; -#endif -#if defined( __CL_SHORT2__) - __cl_short2 v2[4]; -#endif -#if defined( __CL_SHORT4__) - __cl_short4 v4[2]; -#endif -#if defined( __CL_SHORT8__ ) - __cl_short8 v8; -#endif -}cl_short8; - -typedef union -{ - cl_short CL_ALIGNED(32) s[16]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; - __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; - __CL_ANON_STRUCT__ struct{ cl_short8 lo, hi; }; -#endif -#if defined( __CL_SHORT2__) - __cl_short2 v2[8]; -#endif -#if defined( __CL_SHORT4__) - __cl_short4 v4[4]; -#endif -#if defined( __CL_SHORT8__ ) - __cl_short8 v8[2]; -#endif -#if defined( __CL_SHORT16__ ) - __cl_short16 v16; -#endif -}cl_short16; - - -/* ---- cl_ushortn ---- */ -typedef union -{ - cl_ushort CL_ALIGNED(4) s[2]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_ushort x, y; }; - __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1; }; - __CL_ANON_STRUCT__ struct{ cl_ushort lo, hi; }; -#endif -#if defined( __CL_USHORT2__) - __cl_ushort2 v2; -#endif -}cl_ushort2; - -typedef union -{ - cl_ushort CL_ALIGNED(8) s[4]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3; }; - __CL_ANON_STRUCT__ struct{ cl_ushort2 lo, hi; }; -#endif -#if defined( __CL_USHORT2__) - __cl_ushort2 v2[2]; -#endif -#if defined( __CL_USHORT4__) - __cl_ushort4 v4; -#endif -}cl_ushort4; - -/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */ -typedef cl_ushort4 cl_ushort3; - -typedef union -{ - cl_ushort CL_ALIGNED(16) s[8]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; }; - __CL_ANON_STRUCT__ struct{ cl_ushort4 lo, hi; }; -#endif -#if defined( __CL_USHORT2__) - __cl_ushort2 v2[4]; -#endif -#if defined( __CL_USHORT4__) - __cl_ushort4 v4[2]; -#endif -#if defined( __CL_USHORT8__ ) - __cl_ushort8 v8; -#endif -}cl_ushort8; - -typedef union -{ - cl_ushort CL_ALIGNED(32) s[16]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; - __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; - __CL_ANON_STRUCT__ struct{ cl_ushort8 lo, hi; }; -#endif -#if defined( __CL_USHORT2__) - __cl_ushort2 v2[8]; -#endif -#if defined( __CL_USHORT4__) - __cl_ushort4 v4[4]; -#endif -#if defined( __CL_USHORT8__ ) - __cl_ushort8 v8[2]; -#endif -#if defined( __CL_USHORT16__ ) - __cl_ushort16 v16; -#endif -}cl_ushort16; - - -/* ---- cl_halfn ---- */ -typedef union -{ - cl_half CL_ALIGNED(4) s[2]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_half x, y; }; - __CL_ANON_STRUCT__ struct{ cl_half s0, s1; }; - __CL_ANON_STRUCT__ struct{ cl_half lo, hi; }; -#endif -#if defined( __CL_HALF2__) - __cl_half2 v2; -#endif -}cl_half2; - -typedef union -{ - cl_half CL_ALIGNED(8) s[4]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3; }; - __CL_ANON_STRUCT__ struct{ cl_half2 lo, hi; }; -#endif -#if defined( __CL_HALF2__) - __cl_half2 v2[2]; -#endif -#if defined( __CL_HALF4__) - __cl_half4 v4; -#endif -}cl_half4; - -/* cl_half3 is identical in size, alignment and behavior to cl_half4. See section 6.1.5. */ -typedef cl_half4 cl_half3; - -typedef union -{ - cl_half CL_ALIGNED(16) s[8]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7; }; - __CL_ANON_STRUCT__ struct{ cl_half4 lo, hi; }; -#endif -#if defined( __CL_HALF2__) - __cl_half2 v2[4]; -#endif -#if defined( __CL_HALF4__) - __cl_half4 v4[2]; -#endif -#if defined( __CL_HALF8__ ) - __cl_half8 v8; -#endif -}cl_half8; - -typedef union -{ - cl_half CL_ALIGNED(32) s[16]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; - __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; - __CL_ANON_STRUCT__ struct{ cl_half8 lo, hi; }; -#endif -#if defined( __CL_HALF2__) - __cl_half2 v2[8]; -#endif -#if defined( __CL_HALF4__) - __cl_half4 v4[4]; -#endif -#if defined( __CL_HALF8__ ) - __cl_half8 v8[2]; -#endif -#if defined( __CL_HALF16__ ) - __cl_half16 v16; -#endif -}cl_half16; - -/* ---- cl_intn ---- */ -typedef union -{ - cl_int CL_ALIGNED(8) s[2]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_int x, y; }; - __CL_ANON_STRUCT__ struct{ cl_int s0, s1; }; - __CL_ANON_STRUCT__ struct{ cl_int lo, hi; }; -#endif -#if defined( __CL_INT2__) - __cl_int2 v2; -#endif -}cl_int2; - -typedef union -{ - cl_int CL_ALIGNED(16) s[4]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3; }; - __CL_ANON_STRUCT__ struct{ cl_int2 lo, hi; }; -#endif -#if defined( __CL_INT2__) - __cl_int2 v2[2]; -#endif -#if defined( __CL_INT4__) - __cl_int4 v4; -#endif -}cl_int4; - -/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */ -typedef cl_int4 cl_int3; - -typedef union -{ - cl_int CL_ALIGNED(32) s[8]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; }; - __CL_ANON_STRUCT__ struct{ cl_int4 lo, hi; }; -#endif -#if defined( __CL_INT2__) - __cl_int2 v2[4]; -#endif -#if defined( __CL_INT4__) - __cl_int4 v4[2]; -#endif -#if defined( __CL_INT8__ ) - __cl_int8 v8; -#endif -}cl_int8; - -typedef union -{ - cl_int CL_ALIGNED(64) s[16]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; - __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; - __CL_ANON_STRUCT__ struct{ cl_int8 lo, hi; }; -#endif -#if defined( __CL_INT2__) - __cl_int2 v2[8]; -#endif -#if defined( __CL_INT4__) - __cl_int4 v4[4]; -#endif -#if defined( __CL_INT8__ ) - __cl_int8 v8[2]; -#endif -#if defined( __CL_INT16__ ) - __cl_int16 v16; -#endif -}cl_int16; - - -/* ---- cl_uintn ---- */ -typedef union -{ - cl_uint CL_ALIGNED(8) s[2]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_uint x, y; }; - __CL_ANON_STRUCT__ struct{ cl_uint s0, s1; }; - __CL_ANON_STRUCT__ struct{ cl_uint lo, hi; }; -#endif -#if defined( __CL_UINT2__) - __cl_uint2 v2; -#endif -}cl_uint2; - -typedef union -{ - cl_uint CL_ALIGNED(16) s[4]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3; }; - __CL_ANON_STRUCT__ struct{ cl_uint2 lo, hi; }; -#endif -#if defined( __CL_UINT2__) - __cl_uint2 v2[2]; -#endif -#if defined( __CL_UINT4__) - __cl_uint4 v4; -#endif -}cl_uint4; - -/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */ -typedef cl_uint4 cl_uint3; - -typedef union -{ - cl_uint CL_ALIGNED(32) s[8]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; }; - __CL_ANON_STRUCT__ struct{ cl_uint4 lo, hi; }; -#endif -#if defined( __CL_UINT2__) - __cl_uint2 v2[4]; -#endif -#if defined( __CL_UINT4__) - __cl_uint4 v4[2]; -#endif -#if defined( __CL_UINT8__ ) - __cl_uint8 v8; -#endif -}cl_uint8; - -typedef union -{ - cl_uint CL_ALIGNED(64) s[16]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; - __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; - __CL_ANON_STRUCT__ struct{ cl_uint8 lo, hi; }; -#endif -#if defined( __CL_UINT2__) - __cl_uint2 v2[8]; -#endif -#if defined( __CL_UINT4__) - __cl_uint4 v4[4]; -#endif -#if defined( __CL_UINT8__ ) - __cl_uint8 v8[2]; -#endif -#if defined( __CL_UINT16__ ) - __cl_uint16 v16; -#endif -}cl_uint16; - -/* ---- cl_longn ---- */ -typedef union -{ - cl_long CL_ALIGNED(16) s[2]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_long x, y; }; - __CL_ANON_STRUCT__ struct{ cl_long s0, s1; }; - __CL_ANON_STRUCT__ struct{ cl_long lo, hi; }; -#endif -#if defined( __CL_LONG2__) - __cl_long2 v2; -#endif -}cl_long2; - -typedef union -{ - cl_long CL_ALIGNED(32) s[4]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3; }; - __CL_ANON_STRUCT__ struct{ cl_long2 lo, hi; }; -#endif -#if defined( __CL_LONG2__) - __cl_long2 v2[2]; -#endif -#if defined( __CL_LONG4__) - __cl_long4 v4; -#endif -}cl_long4; - -/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */ -typedef cl_long4 cl_long3; - -typedef union -{ - cl_long CL_ALIGNED(64) s[8]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; }; - __CL_ANON_STRUCT__ struct{ cl_long4 lo, hi; }; -#endif -#if defined( __CL_LONG2__) - __cl_long2 v2[4]; -#endif -#if defined( __CL_LONG4__) - __cl_long4 v4[2]; -#endif -#if defined( __CL_LONG8__ ) - __cl_long8 v8; -#endif -}cl_long8; - -typedef union -{ - cl_long CL_ALIGNED(128) s[16]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; - __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; - __CL_ANON_STRUCT__ struct{ cl_long8 lo, hi; }; -#endif -#if defined( __CL_LONG2__) - __cl_long2 v2[8]; -#endif -#if defined( __CL_LONG4__) - __cl_long4 v4[4]; -#endif -#if defined( __CL_LONG8__ ) - __cl_long8 v8[2]; -#endif -#if defined( __CL_LONG16__ ) - __cl_long16 v16; -#endif -}cl_long16; - - -/* ---- cl_ulongn ---- */ -typedef union -{ - cl_ulong CL_ALIGNED(16) s[2]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_ulong x, y; }; - __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1; }; - __CL_ANON_STRUCT__ struct{ cl_ulong lo, hi; }; -#endif -#if defined( __CL_ULONG2__) - __cl_ulong2 v2; -#endif -}cl_ulong2; - -typedef union -{ - cl_ulong CL_ALIGNED(32) s[4]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3; }; - __CL_ANON_STRUCT__ struct{ cl_ulong2 lo, hi; }; -#endif -#if defined( __CL_ULONG2__) - __cl_ulong2 v2[2]; -#endif -#if defined( __CL_ULONG4__) - __cl_ulong4 v4; -#endif -}cl_ulong4; - -/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */ -typedef cl_ulong4 cl_ulong3; - -typedef union -{ - cl_ulong CL_ALIGNED(64) s[8]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; }; - __CL_ANON_STRUCT__ struct{ cl_ulong4 lo, hi; }; -#endif -#if defined( __CL_ULONG2__) - __cl_ulong2 v2[4]; -#endif -#if defined( __CL_ULONG4__) - __cl_ulong4 v4[2]; -#endif -#if defined( __CL_ULONG8__ ) - __cl_ulong8 v8; -#endif -}cl_ulong8; - -typedef union -{ - cl_ulong CL_ALIGNED(128) s[16]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; - __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; - __CL_ANON_STRUCT__ struct{ cl_ulong8 lo, hi; }; -#endif -#if defined( __CL_ULONG2__) - __cl_ulong2 v2[8]; -#endif -#if defined( __CL_ULONG4__) - __cl_ulong4 v4[4]; -#endif -#if defined( __CL_ULONG8__ ) - __cl_ulong8 v8[2]; -#endif -#if defined( __CL_ULONG16__ ) - __cl_ulong16 v16; -#endif -}cl_ulong16; - - -/* --- cl_floatn ---- */ - -typedef union -{ - cl_float CL_ALIGNED(8) s[2]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_float x, y; }; - __CL_ANON_STRUCT__ struct{ cl_float s0, s1; }; - __CL_ANON_STRUCT__ struct{ cl_float lo, hi; }; -#endif -#if defined( __CL_FLOAT2__) - __cl_float2 v2; -#endif -}cl_float2; - -typedef union -{ - cl_float CL_ALIGNED(16) s[4]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3; }; - __CL_ANON_STRUCT__ struct{ cl_float2 lo, hi; }; -#endif -#if defined( __CL_FLOAT2__) - __cl_float2 v2[2]; -#endif -#if defined( __CL_FLOAT4__) - __cl_float4 v4; -#endif -}cl_float4; - -/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */ -typedef cl_float4 cl_float3; - -typedef union -{ - cl_float CL_ALIGNED(32) s[8]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; }; - __CL_ANON_STRUCT__ struct{ cl_float4 lo, hi; }; -#endif -#if defined( __CL_FLOAT2__) - __cl_float2 v2[4]; -#endif -#if defined( __CL_FLOAT4__) - __cl_float4 v4[2]; -#endif -#if defined( __CL_FLOAT8__ ) - __cl_float8 v8; -#endif -}cl_float8; - -typedef union -{ - cl_float CL_ALIGNED(64) s[16]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; - __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; - __CL_ANON_STRUCT__ struct{ cl_float8 lo, hi; }; -#endif -#if defined( __CL_FLOAT2__) - __cl_float2 v2[8]; -#endif -#if defined( __CL_FLOAT4__) - __cl_float4 v4[4]; -#endif -#if defined( __CL_FLOAT8__ ) - __cl_float8 v8[2]; -#endif -#if defined( __CL_FLOAT16__ ) - __cl_float16 v16; -#endif -}cl_float16; - -/* --- cl_doublen ---- */ - -typedef union -{ - cl_double CL_ALIGNED(16) s[2]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_double x, y; }; - __CL_ANON_STRUCT__ struct{ cl_double s0, s1; }; - __CL_ANON_STRUCT__ struct{ cl_double lo, hi; }; -#endif -#if defined( __CL_DOUBLE2__) - __cl_double2 v2; -#endif -}cl_double2; - -typedef union -{ - cl_double CL_ALIGNED(32) s[4]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3; }; - __CL_ANON_STRUCT__ struct{ cl_double2 lo, hi; }; -#endif -#if defined( __CL_DOUBLE2__) - __cl_double2 v2[2]; -#endif -#if defined( __CL_DOUBLE4__) - __cl_double4 v4; -#endif -}cl_double4; - -/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */ -typedef cl_double4 cl_double3; - -typedef union -{ - cl_double CL_ALIGNED(64) s[8]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; - __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; }; - __CL_ANON_STRUCT__ struct{ cl_double4 lo, hi; }; -#endif -#if defined( __CL_DOUBLE2__) - __cl_double2 v2[4]; -#endif -#if defined( __CL_DOUBLE4__) - __cl_double4 v4[2]; -#endif -#if defined( __CL_DOUBLE8__ ) - __cl_double8 v8; -#endif -}cl_double8; - -typedef union -{ - cl_double CL_ALIGNED(128) s[16]; -#if __CL_HAS_ANON_STRUCT__ - __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; - __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; - __CL_ANON_STRUCT__ struct{ cl_double8 lo, hi; }; -#endif -#if defined( __CL_DOUBLE2__) - __cl_double2 v2[8]; -#endif -#if defined( __CL_DOUBLE4__) - __cl_double4 v4[4]; -#endif -#if defined( __CL_DOUBLE8__ ) - __cl_double8 v8[2]; -#endif -#if defined( __CL_DOUBLE16__ ) - __cl_double16 v16; -#endif -}cl_double16; - -/* Macro to facilitate debugging - * Usage: - * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. - * The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \" - * Each line thereafter of OpenCL C source must end with: \n\ - * The last line ends in "; - * - * Example: - * - * const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\ - * kernel void foo( int a, float * b ) \n\ - * { \n\ - * // my comment \n\ - * *b[ get_global_id(0)] = a; \n\ - * } \n\ - * "; - * - * This should correctly set up the line, (column) and file information for your source - * string so you can do source level debugging. - */ -#define __CL_STRINGIFY( _x ) # _x -#define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x ) -#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" - -#ifdef __cplusplus -} -#endif - -#undef __CL_HAS_ANON_STRUCT__ -#undef __CL_ANON_STRUCT__ -#if defined( _WIN32) && defined(_MSC_VER) - #if _MSC_VER >=1500 - #pragma warning( pop ) - #endif -#endif - -#endif /* __CL_PLATFORM_H */ diff --git a/src/CL/cl_va_api_media_sharing_intel.h b/src/CL/cl_va_api_media_sharing_intel.h deleted file mode 100644 index 0e7cd4d..0000000 --- a/src/CL/cl_va_api_media_sharing_intel.h +++ /dev/null @@ -1,160 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ -/*****************************************************************************\ - -Copyright (c) 2013-2019 Intel Corporation All Rights Reserved. - -THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE -MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -File Name: cl_va_api_media_sharing_intel.h - -Abstract: - -Notes: - -\*****************************************************************************/ - - -#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H -#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H - -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/****************************************** -* cl_intel_va_api_media_sharing extension * -*******************************************/ - -#define cl_intel_va_api_media_sharing 1 - -/* error codes */ -#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098 -#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099 -#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100 -#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101 - -/* cl_va_api_device_source_intel */ -#define CL_VA_API_DISPLAY_INTEL 0x4094 - -/* cl_va_api_device_set_intel */ -#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095 -#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096 - -/* cl_context_info */ -#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097 - -/* cl_mem_info */ -#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098 - -/* cl_image_info */ -#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099 - -/* cl_command_type */ -#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A -#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B - -typedef cl_uint cl_va_api_device_source_intel; -typedef cl_uint cl_va_api_device_set_intel; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetDeviceIDsFromVA_APIMediaAdapterINTEL( - cl_platform_id platform, - cl_va_api_device_source_intel media_adapter_type, - void* media_adapter, - cl_va_api_device_set_intel media_adapter_set, - cl_uint num_entries, - cl_device_id* devices, - cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)( - cl_platform_id platform, - cl_va_api_device_source_intel media_adapter_type, - void* media_adapter, - cl_va_api_device_set_intel media_adapter_set, - cl_uint num_entries, - cl_device_id* devices, - cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateFromVA_APIMediaSurfaceINTEL( - cl_context context, - cl_mem_flags flags, - VASurfaceID* surface, - cl_uint plane, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)( - cl_context context, - cl_mem_flags flags, - VASurfaceID* surface, - cl_uint plane, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueAcquireVA_APIMediaSurfacesINTEL( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueReleaseVA_APIMediaSurfacesINTEL( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; - -#ifdef __cplusplus -} -#endif - -#endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */ - diff --git a/src/CL/cl_version.h b/src/CL/cl_version.h deleted file mode 100644 index f38280a..0000000 --- a/src/CL/cl_version.h +++ /dev/null @@ -1,81 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2018-2020 The Khronos Group Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ - -#ifndef __CL_VERSION_H -#define __CL_VERSION_H - -/* Detect which version to target */ -#if !defined(CL_TARGET_OPENCL_VERSION) -#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)") -#define CL_TARGET_OPENCL_VERSION 220 -#endif -#if CL_TARGET_OPENCL_VERSION != 100 && \ - CL_TARGET_OPENCL_VERSION != 110 && \ - CL_TARGET_OPENCL_VERSION != 120 && \ - CL_TARGET_OPENCL_VERSION != 200 && \ - CL_TARGET_OPENCL_VERSION != 210 && \ - CL_TARGET_OPENCL_VERSION != 220 && \ - CL_TARGET_OPENCL_VERSION != 300 -#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220, 300). Defaulting to 220 (OpenCL 2.2)") -#undef CL_TARGET_OPENCL_VERSION -#define CL_TARGET_OPENCL_VERSION 220 -#endif - - -/* OpenCL Version */ -#if CL_TARGET_OPENCL_VERSION >= 300 && !defined(CL_VERSION_3_0) -#define CL_VERSION_3_0 1 -#endif -#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2) -#define CL_VERSION_2_2 1 -#endif -#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1) -#define CL_VERSION_2_1 1 -#endif -#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0) -#define CL_VERSION_2_0 1 -#endif -#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2) -#define CL_VERSION_1_2 1 -#endif -#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1) -#define CL_VERSION_1_1 1 -#endif -#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0) -#define CL_VERSION_1_0 1 -#endif - -/* Allow deprecated APIs for older OpenCL versions. */ -#if CL_TARGET_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS) -#define CL_USE_DEPRECATED_OPENCL_2_2_APIS -#endif -#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) -#define CL_USE_DEPRECATED_OPENCL_2_1_APIS -#endif -#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) -#define CL_USE_DEPRECATED_OPENCL_2_0_APIS -#endif -#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) -#define CL_USE_DEPRECATED_OPENCL_1_2_APIS -#endif -#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -#define CL_USE_DEPRECATED_OPENCL_1_1_APIS -#endif -#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) -#define CL_USE_DEPRECATED_OPENCL_1_0_APIS -#endif - -#endif /* __CL_VERSION_H */ diff --git a/src/CL/opencl.h b/src/CL/opencl.h deleted file mode 100644 index 1c4e10c..0000000 --- a/src/CL/opencl.h +++ /dev/null @@ -1,33 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ - -#ifndef __OPENCL_H -#define __OPENCL_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include -#include - -#ifdef __cplusplus -} -#endif - -#endif /* __OPENCL_H */ diff --git a/src/acc/Makefile b/src/acc/Makefile deleted file mode 100644 index 55fbfbd..0000000 --- a/src/acc/Makefile +++ /dev/null @@ -1,58 +0,0 @@ - -ifndef COMPILER -define compiler_help -Set COMPILER to ensure correct flags are set. -Available compilers are: - PGI GNU -endef -$(info $(compiler_help)) -endif - -COMPILER_ = $(CXX) -COMPILER_PGI = pgc++ -COMPILER_GNU = g++ - -FLAGS_ = -O3 -std=c++11 - -FLAGS_PGI = -std=c++11 -O3 -acc -ifeq ($(COMPILER), PGI) -define target_help -Set a TARGET to ensure PGI targets the correct offload device. -Available targets are: - SNB, IVB, HSW, SKL, KNL - PWR9, AMD - KEPLER, MAXWELL, PASCAL, VOLTA - HAWAII -endef -ifndef TARGET -$(error $(target_help)) -endif -TARGET_FLAGS_SNB = -ta=multicore -tp=sandybridge -TARGET_FLAGS_IVB = -ta=multicore -tp=ivybridge -TARGET_FLAGS_HSW = -ta=multicore -tp=haswell -TARGET_FLAGS_SKL = -ta=multicore -tp=skylake -TARGET_FLAGS_KNL = -ta=multicore -tp=knl -TARGET_FLAGS_PWR9 = -ta=multicore -tp=pwr9 -TARGET_FLAGS_AMD = -ta=multicore -tp=zen -TARGET_FLAGS_KEPLER = -ta=nvidia:cc35 -TARGET_FLAGS_MAXWELL = -ta=nvidia:cc50 -TARGET_FLAGS_PASCAL = -ta=nvidia:cc60 -TARGET_FLAGS_VOLTA = -ta=nvidia:cc70 -TARGET_FLAGS_HAWAII = -ta=radeon:hawaii -ifeq ($(TARGET_FLAGS_$(TARGET)),) -$(error $(target_help)) -endif - -FLAGS_PGI += $(TARGET_FLAGS_$(TARGET)) - -endif - -FLAGS_GNU = -O3 -std=c++11 -Drestrict=__restrict -fopenacc -CXXFLAGS = $(FLAGS_$(COMPILER)) - -acc-stream: ../main.cpp ACCStream.cpp - $(COMPILER_$(COMPILER)) $(CXXFLAGS) -DACC $^ $(EXTRA_FLAGS) -o $@ -I. -I.. - -.PHONY: clean -clean: - rm -f acc-stream main.o ACCStream.o diff --git a/src/cuda/Makefile b/src/cuda/Makefile deleted file mode 100644 index 153f07d..0000000 --- a/src/cuda/Makefile +++ /dev/null @@ -1,40 +0,0 @@ -CXXFLAGS=-O3 -CUDA_CXX=nvcc - - -ifndef NVARCH -define nvarch_help -Set NVARCH to select sm_?? version. -Default: sm_60 - -endef -$(info $(nvarch_help)) -NVARCH=sm_60 -endif - - -ifndef MEM -define mem_help -Set MEM to select memory mode. -Available options: - DEFAULT - allocate host and device memory pointers. - MANAGED - use CUDA Managed Memory. - PAGEFAULT - shared memory, only host pointers allocated. - -endef -$(info $(mem_help)) -MEM=DEFAULT -endif - -MEM_MANAGED= -DMANAGED -MEM_PAGEFAULT= -DPAGEFAULT -MEM_MODE = $(MEM_$(MEM)) - - -cuda-stream: ../main.cpp CUDAStream.cu - $(CUDA_CXX) -std=c++11 $(CXXFLAGS) -arch=$(NVARCH) $(MEM_MODE) -DCUDA $^ $(EXTRA_FLAGS) -o $@ -I. -I.. - -.PHONY: clean -clean: - rm -f cuda-stream - diff --git a/src/hip/Makefile b/src/hip/Makefile deleted file mode 100644 index 21383b0..0000000 --- a/src/hip/Makefile +++ /dev/null @@ -1,11 +0,0 @@ - -HIP_PATH?= /opt/rocm/hip -HIPCC=$(HIP_PATH)/bin/hipcc - -hip-stream: ../main.cpp HIPStream.cpp - $(HIPCC) $(CXXFLAGS) -O3 -std=c++11 -DHIP $^ $(EXTRA_FLAGS) -o $@ -I. -I.. - -.PHONY: clean -clean: - rm -f hip-stream - diff --git a/java-stream/.gitignore b/src/java/java-stream/.gitignore similarity index 100% rename from java-stream/.gitignore rename to src/java/java-stream/.gitignore diff --git a/java-stream/.mvn/wrapper/maven-wrapper.jar b/src/java/java-stream/.mvn/wrapper/maven-wrapper.jar similarity index 100% rename from java-stream/.mvn/wrapper/maven-wrapper.jar rename to src/java/java-stream/.mvn/wrapper/maven-wrapper.jar diff --git a/java-stream/.mvn/wrapper/maven-wrapper.properties b/src/java/java-stream/.mvn/wrapper/maven-wrapper.properties similarity index 100% rename from java-stream/.mvn/wrapper/maven-wrapper.properties rename to src/java/java-stream/.mvn/wrapper/maven-wrapper.properties diff --git a/java-stream/README.md b/src/java/java-stream/README.md similarity index 100% rename from java-stream/README.md rename to src/java/java-stream/README.md diff --git a/java-stream/mvnw b/src/java/java-stream/mvnw similarity index 100% rename from java-stream/mvnw rename to src/java/java-stream/mvnw diff --git a/java-stream/mvnw.cmd b/src/java/java-stream/mvnw.cmd similarity index 100% rename from java-stream/mvnw.cmd rename to src/java/java-stream/mvnw.cmd diff --git a/java-stream/pom.xml b/src/java/java-stream/pom.xml similarity index 100% rename from java-stream/pom.xml rename to src/java/java-stream/pom.xml diff --git a/java-stream/src/main/java/javastream/FractionalMaths.java b/src/java/java-stream/src/main/java/javastream/FractionalMaths.java similarity index 100% rename from java-stream/src/main/java/javastream/FractionalMaths.java rename to src/java/java-stream/src/main/java/javastream/FractionalMaths.java diff --git a/java-stream/src/main/java/javastream/JavaStream.java b/src/java/java-stream/src/main/java/javastream/JavaStream.java similarity index 100% rename from java-stream/src/main/java/javastream/JavaStream.java rename to src/java/java-stream/src/main/java/javastream/JavaStream.java diff --git a/java-stream/src/main/java/javastream/Main.java b/src/java/java-stream/src/main/java/javastream/Main.java similarity index 100% rename from java-stream/src/main/java/javastream/Main.java rename to src/java/java-stream/src/main/java/javastream/Main.java diff --git a/java-stream/src/main/java/javastream/aparapi/AparapiStreams.java b/src/java/java-stream/src/main/java/javastream/aparapi/AparapiStreams.java similarity index 100% rename from java-stream/src/main/java/javastream/aparapi/AparapiStreams.java rename to src/java/java-stream/src/main/java/javastream/aparapi/AparapiStreams.java diff --git a/java-stream/src/main/java/javastream/aparapi/GenericAparapiStreamKernel.java b/src/java/java-stream/src/main/java/javastream/aparapi/GenericAparapiStreamKernel.java similarity index 100% rename from java-stream/src/main/java/javastream/aparapi/GenericAparapiStreamKernel.java rename to src/java/java-stream/src/main/java/javastream/aparapi/GenericAparapiStreamKernel.java diff --git a/java-stream/src/main/java/javastream/aparapi/SpecialisedDoubleKernel.java b/src/java/java-stream/src/main/java/javastream/aparapi/SpecialisedDoubleKernel.java similarity index 100% rename from java-stream/src/main/java/javastream/aparapi/SpecialisedDoubleKernel.java rename to src/java/java-stream/src/main/java/javastream/aparapi/SpecialisedDoubleKernel.java diff --git a/java-stream/src/main/java/javastream/aparapi/SpecialisedFloatKernel.java b/src/java/java-stream/src/main/java/javastream/aparapi/SpecialisedFloatKernel.java similarity index 100% rename from java-stream/src/main/java/javastream/aparapi/SpecialisedFloatKernel.java rename to src/java/java-stream/src/main/java/javastream/aparapi/SpecialisedFloatKernel.java diff --git a/java-stream/src/main/java/javastream/jdk/GenericPlainStream.java b/src/java/java-stream/src/main/java/javastream/jdk/GenericPlainStream.java similarity index 100% rename from java-stream/src/main/java/javastream/jdk/GenericPlainStream.java rename to src/java/java-stream/src/main/java/javastream/jdk/GenericPlainStream.java diff --git a/java-stream/src/main/java/javastream/jdk/GenericStream.java b/src/java/java-stream/src/main/java/javastream/jdk/GenericStream.java similarity index 100% rename from java-stream/src/main/java/javastream/jdk/GenericStream.java rename to src/java/java-stream/src/main/java/javastream/jdk/GenericStream.java diff --git a/java-stream/src/main/java/javastream/jdk/JdkStreams.java b/src/java/java-stream/src/main/java/javastream/jdk/JdkStreams.java similarity index 100% rename from java-stream/src/main/java/javastream/jdk/JdkStreams.java rename to src/java/java-stream/src/main/java/javastream/jdk/JdkStreams.java diff --git a/java-stream/src/main/java/javastream/jdk/PlainStream.java b/src/java/java-stream/src/main/java/javastream/jdk/PlainStream.java similarity index 100% rename from java-stream/src/main/java/javastream/jdk/PlainStream.java rename to src/java/java-stream/src/main/java/javastream/jdk/PlainStream.java diff --git a/java-stream/src/main/java/javastream/jdk/SpecialisedDoubleStream.java b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedDoubleStream.java similarity index 100% rename from java-stream/src/main/java/javastream/jdk/SpecialisedDoubleStream.java rename to src/java/java-stream/src/main/java/javastream/jdk/SpecialisedDoubleStream.java diff --git a/java-stream/src/main/java/javastream/jdk/SpecialisedFloatStream.java b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedFloatStream.java similarity index 100% rename from java-stream/src/main/java/javastream/jdk/SpecialisedFloatStream.java rename to src/java/java-stream/src/main/java/javastream/jdk/SpecialisedFloatStream.java diff --git a/java-stream/src/main/java/javastream/jdk/SpecialisedPlainDoubleStream.java b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainDoubleStream.java similarity index 100% rename from java-stream/src/main/java/javastream/jdk/SpecialisedPlainDoubleStream.java rename to src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainDoubleStream.java diff --git a/java-stream/src/main/java/javastream/jdk/SpecialisedPlainFloatStream.java b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainFloatStream.java similarity index 100% rename from java-stream/src/main/java/javastream/jdk/SpecialisedPlainFloatStream.java rename to src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainFloatStream.java diff --git a/java-stream/src/main/java/javastream/tornadovm/GenericTornadoVMStream.java b/src/java/java-stream/src/main/java/javastream/tornadovm/GenericTornadoVMStream.java similarity index 100% rename from java-stream/src/main/java/javastream/tornadovm/GenericTornadoVMStream.java rename to src/java/java-stream/src/main/java/javastream/tornadovm/GenericTornadoVMStream.java diff --git a/java-stream/src/main/java/javastream/tornadovm/SpecialisedDouble.java b/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedDouble.java similarity index 100% rename from java-stream/src/main/java/javastream/tornadovm/SpecialisedDouble.java rename to src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedDouble.java diff --git a/java-stream/src/main/java/javastream/tornadovm/SpecialisedFloat.java b/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedFloat.java similarity index 100% rename from java-stream/src/main/java/javastream/tornadovm/SpecialisedFloat.java rename to src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedFloat.java diff --git a/java-stream/src/main/java/javastream/tornadovm/TornadoVMStreams.java b/src/java/java-stream/src/main/java/javastream/tornadovm/TornadoVMStreams.java similarity index 100% rename from java-stream/src/main/java/javastream/tornadovm/TornadoVMStreams.java rename to src/java/java-stream/src/main/java/javastream/tornadovm/TornadoVMStreams.java diff --git a/java-stream/src/test/java/javastream/SmokeTest.java b/src/java/java-stream/src/test/java/javastream/SmokeTest.java similarity index 100% rename from java-stream/src/test/java/javastream/SmokeTest.java rename to src/java/java-stream/src/test/java/javastream/SmokeTest.java diff --git a/JuliaStream.jl/.JuliaFormatter.toml b/src/julia/JuliaStream.jl/.JuliaFormatter.toml similarity index 100% rename from JuliaStream.jl/.JuliaFormatter.toml rename to src/julia/JuliaStream.jl/.JuliaFormatter.toml diff --git a/JuliaStream.jl/.gitignore b/src/julia/JuliaStream.jl/.gitignore similarity index 100% rename from JuliaStream.jl/.gitignore rename to src/julia/JuliaStream.jl/.gitignore diff --git a/JuliaStream.jl/AMDGPU/Manifest.toml b/src/julia/JuliaStream.jl/AMDGPU/Manifest.toml similarity index 100% rename from JuliaStream.jl/AMDGPU/Manifest.toml rename to src/julia/JuliaStream.jl/AMDGPU/Manifest.toml diff --git a/JuliaStream.jl/AMDGPU/Project.toml b/src/julia/JuliaStream.jl/AMDGPU/Project.toml similarity index 100% rename from JuliaStream.jl/AMDGPU/Project.toml rename to src/julia/JuliaStream.jl/AMDGPU/Project.toml diff --git a/JuliaStream.jl/CUDA/Manifest.toml b/src/julia/JuliaStream.jl/CUDA/Manifest.toml similarity index 100% rename from JuliaStream.jl/CUDA/Manifest.toml rename to src/julia/JuliaStream.jl/CUDA/Manifest.toml diff --git a/JuliaStream.jl/CUDA/Project.toml b/src/julia/JuliaStream.jl/CUDA/Project.toml similarity index 100% rename from JuliaStream.jl/CUDA/Project.toml rename to src/julia/JuliaStream.jl/CUDA/Project.toml diff --git a/JuliaStream.jl/KernelAbstractions/Manifest.toml b/src/julia/JuliaStream.jl/KernelAbstractions/Manifest.toml similarity index 100% rename from JuliaStream.jl/KernelAbstractions/Manifest.toml rename to src/julia/JuliaStream.jl/KernelAbstractions/Manifest.toml diff --git a/JuliaStream.jl/KernelAbstractions/Project.toml b/src/julia/JuliaStream.jl/KernelAbstractions/Project.toml similarity index 100% rename from JuliaStream.jl/KernelAbstractions/Project.toml rename to src/julia/JuliaStream.jl/KernelAbstractions/Project.toml diff --git a/JuliaStream.jl/Manifest.toml b/src/julia/JuliaStream.jl/Manifest.toml similarity index 100% rename from JuliaStream.jl/Manifest.toml rename to src/julia/JuliaStream.jl/Manifest.toml diff --git a/JuliaStream.jl/Project.toml b/src/julia/JuliaStream.jl/Project.toml similarity index 100% rename from JuliaStream.jl/Project.toml rename to src/julia/JuliaStream.jl/Project.toml diff --git a/JuliaStream.jl/README.md b/src/julia/JuliaStream.jl/README.md similarity index 100% rename from JuliaStream.jl/README.md rename to src/julia/JuliaStream.jl/README.md diff --git a/JuliaStream.jl/Threaded/Manifest.toml b/src/julia/JuliaStream.jl/Threaded/Manifest.toml similarity index 100% rename from JuliaStream.jl/Threaded/Manifest.toml rename to src/julia/JuliaStream.jl/Threaded/Manifest.toml diff --git a/JuliaStream.jl/Threaded/Project.toml b/src/julia/JuliaStream.jl/Threaded/Project.toml similarity index 100% rename from JuliaStream.jl/Threaded/Project.toml rename to src/julia/JuliaStream.jl/Threaded/Project.toml diff --git a/JuliaStream.jl/oneAPI/Manifest.toml b/src/julia/JuliaStream.jl/oneAPI/Manifest.toml similarity index 100% rename from JuliaStream.jl/oneAPI/Manifest.toml rename to src/julia/JuliaStream.jl/oneAPI/Manifest.toml diff --git a/JuliaStream.jl/oneAPI/Project.toml b/src/julia/JuliaStream.jl/oneAPI/Project.toml similarity index 100% rename from JuliaStream.jl/oneAPI/Project.toml rename to src/julia/JuliaStream.jl/oneAPI/Project.toml diff --git a/JuliaStream.jl/src/AMDGPUStream.jl b/src/julia/JuliaStream.jl/src/AMDGPUStream.jl similarity index 100% rename from JuliaStream.jl/src/AMDGPUStream.jl rename to src/julia/JuliaStream.jl/src/AMDGPUStream.jl diff --git a/JuliaStream.jl/src/CUDAStream.jl b/src/julia/JuliaStream.jl/src/CUDAStream.jl similarity index 100% rename from JuliaStream.jl/src/CUDAStream.jl rename to src/julia/JuliaStream.jl/src/CUDAStream.jl diff --git a/JuliaStream.jl/src/DistributedStream.jl b/src/julia/JuliaStream.jl/src/DistributedStream.jl similarity index 100% rename from JuliaStream.jl/src/DistributedStream.jl rename to src/julia/JuliaStream.jl/src/DistributedStream.jl diff --git a/JuliaStream.jl/src/JuliaStream.jl b/src/julia/JuliaStream.jl/src/JuliaStream.jl similarity index 100% rename from JuliaStream.jl/src/JuliaStream.jl rename to src/julia/JuliaStream.jl/src/JuliaStream.jl diff --git a/JuliaStream.jl/src/KernelAbstractionsStream.jl b/src/julia/JuliaStream.jl/src/KernelAbstractionsStream.jl similarity index 100% rename from JuliaStream.jl/src/KernelAbstractionsStream.jl rename to src/julia/JuliaStream.jl/src/KernelAbstractionsStream.jl diff --git a/JuliaStream.jl/src/PlainStream.jl b/src/julia/JuliaStream.jl/src/PlainStream.jl similarity index 100% rename from JuliaStream.jl/src/PlainStream.jl rename to src/julia/JuliaStream.jl/src/PlainStream.jl diff --git a/JuliaStream.jl/src/Stream.jl b/src/julia/JuliaStream.jl/src/Stream.jl similarity index 100% rename from JuliaStream.jl/src/Stream.jl rename to src/julia/JuliaStream.jl/src/Stream.jl diff --git a/JuliaStream.jl/src/StreamData.jl b/src/julia/JuliaStream.jl/src/StreamData.jl similarity index 100% rename from JuliaStream.jl/src/StreamData.jl rename to src/julia/JuliaStream.jl/src/StreamData.jl diff --git a/JuliaStream.jl/src/ThreadedStream.jl b/src/julia/JuliaStream.jl/src/ThreadedStream.jl similarity index 100% rename from JuliaStream.jl/src/ThreadedStream.jl rename to src/julia/JuliaStream.jl/src/ThreadedStream.jl diff --git a/JuliaStream.jl/src/oneAPIStream.jl b/src/julia/JuliaStream.jl/src/oneAPIStream.jl similarity index 100% rename from JuliaStream.jl/src/oneAPIStream.jl rename to src/julia/JuliaStream.jl/src/oneAPIStream.jl diff --git a/JuliaStream.jl/update_all.sh b/src/julia/JuliaStream.jl/update_all.sh similarity index 100% rename from JuliaStream.jl/update_all.sh rename to src/julia/JuliaStream.jl/update_all.sh diff --git a/src/kokkos/Makefile b/src/kokkos/Makefile deleted file mode 100644 index 98d8597..0000000 --- a/src/kokkos/Makefile +++ /dev/null @@ -1,99 +0,0 @@ - -default: kokkos-stream - -ifndef DEVICE -define device_help -Set DEVICE to change flags (defaulting to OpenMP). -Available devices are: - OpenMP, Serial, Pthreads, Cuda, HIP - -endef -$(info $(device_help)) -DEVICE="OpenMP" -endif -KOKKOS_DEVICES="$(DEVICE)" - -ifndef ARCH -define arch_help -Set ARCH to change flags (defaulting to empty). -Available architectures are: - AMDAVX - ARMv80 ARMv81 ARMv8-ThunderX - BGQ Power7 Power8 Power9 - WSM SNB HSW BDW SKX KNC KNL - Kepler30 Kepler32 Kepler35 Kepler37 - Maxwell50 Maxwell52 Maxwell53 - Pascal60 Pascal61 - Volta70 Volta72 - -endef -$(info $(arch_help)) -ARCH="" -endif -KOKKOS_ARCH="$(ARCH)" - -ifndef COMPILER -define compiler_help -Set COMPILER to change flags (defaulting to GNU). -Available compilers are: - GNU INTEL CRAY PGI ARMCLANG HIPCC - - Note: you may have to do `export CXX=\path\to\hipcc` in case Kokkos detects the wrong compiler - -endef -$(info $(compiler_help)) -COMPILER=GNU -endif - -COMPILER_ARMCLANG = armclang++ -COMPILER_HIPCC = hipcc -COMPILER_GNU = g++ -COMPILER_INTEL = icpc -qopt-streaming-stores=always -COMPILER_CRAY = CC -COMPILER_PGI = pgc++ -CXX = $(COMPILER_$(COMPILER)) - -ifndef TARGET -define target_help -Set TARGET to change to offload device. Defaulting to CPU. -Available targets are: - CPU (default) - GPU - -endef -$(info $(target_help)) -TARGET=CPU -endif - -ifeq ($(TARGET), GPU) -ifneq ($(COMPILER), HIPCC) -CXX = $(NVCC_WRAPPER) -endif -endif - -OBJ = KokkosStream.o -CXXFLAGS = -O3 -LINKFLAGS = # empty for now - - - -ifeq ($(COMPILER), GNU) -ifeq ($(DEVICE), OpenMP) -CXXFLAGS += -fopenmp -LINKFLAGS += -fopenmp -endif -endif - -include $(KOKKOS_PATH)/Makefile.kokkos -HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp) - -kokkos-stream: ../main.cpp $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(CXX) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -DKOKKOS -o $@ -I. -I.. - -%.o: %.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS) - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) - -.PHONY: clean -clean: - rm -f kokkos-stream main.o KokkosStream.o Kokkos_*.o KokkosCore_* - diff --git a/legacy/HCStream.cpp b/src/legacy/HCStream.cpp similarity index 100% rename from legacy/HCStream.cpp rename to src/legacy/HCStream.cpp diff --git a/legacy/HCStream.h b/src/legacy/HCStream.h similarity index 100% rename from legacy/HCStream.h rename to src/legacy/HCStream.h diff --git a/src/CL/cl2.hpp b/src/ocl/CL/cl2.hpp similarity index 100% rename from src/CL/cl2.hpp rename to src/ocl/CL/cl2.hpp diff --git a/src/ocl/Makefile b/src/ocl/Makefile deleted file mode 100644 index 20cd257..0000000 --- a/src/ocl/Makefile +++ /dev/null @@ -1,39 +0,0 @@ - -ifndef COMPILER -define compiler_help -Set COMPILER to change flags (defaulting to GNU). -Available compilers are: - GNU CLANG INTEL CRAY - -endef -$(info $(compiler_help)) -COMPILER=GNU -endif - -COMPILER_GNU = g++ -COMPILER_CLANG = clang++ -COMPILER_INTEL = icpc -COMPILER_CRAY = CC -CXX = $(COMPILER_$(COMPILER)) - -FLAGS_ = -O3 -std=c++11 -FLAGS_GNU = -O3 -std=c++11 -FLAGS_CLANG = -O3 -std=c++11 -FLAGS_INTEL = -O3 -std=c++11 -FLAGS_CRAY = -O3 -hstd=c++11 -CXXFLAGS=$(FLAGS_$(COMPILER)) - -PLATFORM = $(shell uname -s) -ifeq ($(PLATFORM), Darwin) - LIBS = -framework OpenCL -else - LIBS = -lOpenCL -endif - -ocl-stream: ../main.cpp OCLStream.cpp - $(CXX) $(CXXFLAGS) -DOCL $^ $(EXTRA_FLAGS) $(LIBS) -o $@ -I. -I.. - -.PHONY: clean -clean: - rm -f ocl-stream - diff --git a/src/omp/Makefile b/src/omp/Makefile deleted file mode 100644 index 15bab8a..0000000 --- a/src/omp/Makefile +++ /dev/null @@ -1,103 +0,0 @@ - -ifndef COMPILER -define compiler_help -Set COMPILER to change flags (defaulting to GNU). -Available compilers are: - CLANG CRAY GNU GNU_PPC INTEL XL PGI - NEC ARMCLANG AOMP FUJITSU - -Note: GCC on PPC requires -mcpu=native instead of -march=native so we have a special case for it - -endef -$(info $(compiler_help)) -COMPILER=GNU -endif - -ifndef TARGET -define target_help -Set TARGET to change device (defaulting to CPU). -Available targets are: - CPU NVIDIA AMD INTEL_GPU - -endef -$(info $(target_help)) -TARGET=CPU -endif - -ifeq ("$(COMPILER)", "CLANG") - ifdef TARGET - ifeq ("$(TARGET)", "NVIDIA") - ifndef NVARCH - define nvarch_help - Set NVARCH to select sm_?? version. - Default: sm_60 - - endef - $(info $(nvarch_help)) - NVARCH=sm_60 - endif - endif - endif -endif - -COMPILER_ARMCLANG = armclang++ -COMPILER_GNU = g++ -COMPILER_GNU_PPC = g++ -COMPILER_INTEL = icpc -COMPILER_CRAY = CC -COMPILER_CLANG = clang++ -COMPILER_XL = xlc++ -COMPILER_PGI = pgc++ -COMPILER_NEC = /opt/nec/ve/bin/nc++ -COMPILER_AOMP = clang++ -COMPILER_FUJITSU=FCC -CXX = $(COMPILER_$(COMPILER)) - -FLAGS_GNU = -O3 -std=c++11 -march=native -FLAGS_GNU_PPC = -O3 -std=c++11 -mcpu=native -FLAGS_INTEL = -O3 -std=c++11 -FLAGS_CRAY = -O3 -std=c++11 -FLAGS_CLANG = -O3 -std=c++11 -FLAGS_XL = -O5 -qarch=auto -qtune=auto -std=c++11 -FLAGS_PGI = -O3 -std=c++11 -FLAGS_NEC = -O4 -finline -std=c++11 -FLAGS_ARMCLANG = -O3 -std=c++11 -FLAGS_AOMP = -O3 -std=c++11 -FLAGS_FUJITSU=-Kfast -std=c++11 -KA64FX -KSVE -KARMV8_3_A -Kzfill=100 -Kprefetch_sequential=soft -Kprefetch_line=8 -Kprefetch_line_L2=16 -CXXFLAGS = $(FLAGS_$(COMPILER)) - -# OpenMP flags for CPUs -OMP_ARMCLANG_CPU = -fopenmp -OMP_GNU_CPU = -fopenmp -OMP_GNU_PPC_CPU = -fopenmp -OMP_INTEL_CPU = -qopenmp -OMP_CRAY_CPU = -fopenmp -OMP_CLANG_CPU = -fopenmp=libomp -OMP_XL_CPU = -qsmp=omp -qthreaded -OMP_PGI_CPU = -mp -OMP_NEC_CPU = -fopenmp -OMP_FUJITSU_CPU=-Kopenmp - -# OpenMP flags for NVIDIA -OMP_CRAY_NVIDIA = -DOMP_TARGET_GPU -OMP_CLANG_NVIDIA = -DOMP_TARGET_GPU -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=$(NVARCH) -OMP_GNU_NVIDIA = -DOMP_TARGET_GPU -fopenmp -foffload=nvptx-none -OMP_GNU_AMD = -DOMP_TARGET_GPU -fopenmp -foffload=amdgcn-amdhsa - -OMP_INTEL_CPU = -xHOST -qopt-streaming-stores=always -qopenmp -OMP_INTEL_INTEL_GPU = -DOMP_TARGET_GPU -qnextgen -fiopenmp -fopenmp-targets=spir64 - -OMP_AOMP_GPU = -DOMP_TARGET_GPU -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 - -ifndef OMP_$(COMPILER)_$(TARGET) -$(error Targeting $(TARGET) with $(COMPILER) not supported) -endif - -OMP = $(OMP_$(COMPILER)_$(TARGET)) - -omp-stream: ../main.cpp OMPStream.cpp - $(CXX) $(CXXFLAGS) -DOMP $^ $(OMP) $(EXTRA_FLAGS) -o $@ -I. -I.. - -.PHONY: clean -clean: - rm -f omp-stream diff --git a/src/raja/Makefile b/src/raja/Makefile deleted file mode 100644 index 60f2319..0000000 --- a/src/raja/Makefile +++ /dev/null @@ -1,58 +0,0 @@ - -ifndef TARGET -define target_help -Set TARGET to change to offload device. Defaulting to CPU. -Available targets are: - CPU (default) - GPU -endef -$(info $(target_help)) -TARGET=CPU -endif - -ifeq ($(TARGET), CPU) - -ifndef COMPILER -define compiler_help -Set COMPILER to change flags (defaulting to GNU). -Available compilers are: - INTEL GNU CRAY XL -endef -$(info $(compiler_help)) -COMPILER=GNU -endif - -CXX_INTEL = icpc -CXX_GNU = g++ -CXX_CRAY = CC -CXX_XL = xlc++ - -CXXFLAGS_INTEL = -O3 -std=c++11 -qopenmp -xHost -qopt-streaming-stores=always -CXXFLAGS_GNU = -O3 -std=c++11 -fopenmp -CXXFLAGS_CRAY = -O3 -hstd=c++11 -CXXFLAGS_XL = -O5 -std=c++11 -qarch=pwr8 -qtune=pwr8 -qsmp=omp -qthreaded - -CXX = $(CXX_$(COMPILER)) -CXXFLAGS = -DRAJA_TARGET_CPU $(CXXFLAGS_$(COMPILER)) - -else ifeq ($(TARGET), GPU) -CXX = nvcc - -ifndef ARCH -define arch_help -Set ARCH to ensure correct GPU architecture. -Example: - ARCH=sm_35 -endef -$(error $(arch_help)) -endif -CXXFLAGS = --expt-extended-lambda -O3 -std=c++11 -x cu -Xcompiler -fopenmp -arch $(ARCH) -endif - -raja-stream: ../main.cpp RAJAStream.cpp - $(CXX) $(CXXFLAGS) -DUSE_RAJA -I$(RAJA_PATH)/include $^ $(EXTRA_FLAGS) -L$(RAJA_PATH)/lib -lRAJA -o $@ -I. -I.. - -.PHONY: clean -clean: - rm -f raja-stream - diff --git a/scala-stream/.bsp/sbt.json b/src/scala/scala-stream/.bsp/sbt.json similarity index 100% rename from scala-stream/.bsp/sbt.json rename to src/scala/scala-stream/.bsp/sbt.json diff --git a/scala-stream/.gitignore b/src/scala/scala-stream/.gitignore similarity index 100% rename from scala-stream/.gitignore rename to src/scala/scala-stream/.gitignore diff --git a/scala-stream/.jvmopts b/src/scala/scala-stream/.jvmopts similarity index 100% rename from scala-stream/.jvmopts rename to src/scala/scala-stream/.jvmopts diff --git a/scala-stream/.scalafmt.conf b/src/scala/scala-stream/.scalafmt.conf similarity index 100% rename from scala-stream/.scalafmt.conf rename to src/scala/scala-stream/.scalafmt.conf diff --git a/scala-stream/README.md b/src/scala/scala-stream/README.md similarity index 100% rename from scala-stream/README.md rename to src/scala/scala-stream/README.md diff --git a/scala-stream/build.sbt b/src/scala/scala-stream/build.sbt similarity index 100% rename from scala-stream/build.sbt rename to src/scala/scala-stream/build.sbt diff --git a/scala-stream/project/build.properties b/src/scala/scala-stream/project/build.properties similarity index 100% rename from scala-stream/project/build.properties rename to src/scala/scala-stream/project/build.properties diff --git a/scala-stream/project/plugins.sbt b/src/scala/scala-stream/project/plugins.sbt similarity index 100% rename from scala-stream/project/plugins.sbt rename to src/scala/scala-stream/project/plugins.sbt diff --git a/scala-stream/reflect-config.json b/src/scala/scala-stream/reflect-config.json similarity index 100% rename from scala-stream/reflect-config.json rename to src/scala/scala-stream/reflect-config.json diff --git a/scala-stream/sbt b/src/scala/scala-stream/sbt similarity index 100% rename from scala-stream/sbt rename to src/scala/scala-stream/sbt diff --git a/scala-stream/sbt-dist/bin/java9-rt-export.jar b/src/scala/scala-stream/sbt-dist/bin/java9-rt-export.jar similarity index 100% rename from scala-stream/sbt-dist/bin/java9-rt-export.jar rename to src/scala/scala-stream/sbt-dist/bin/java9-rt-export.jar diff --git a/scala-stream/sbt-dist/bin/sbt b/src/scala/scala-stream/sbt-dist/bin/sbt similarity index 100% rename from scala-stream/sbt-dist/bin/sbt rename to src/scala/scala-stream/sbt-dist/bin/sbt diff --git a/scala-stream/sbt-dist/bin/sbt-launch-lib.bash b/src/scala/scala-stream/sbt-dist/bin/sbt-launch-lib.bash similarity index 100% rename from scala-stream/sbt-dist/bin/sbt-launch-lib.bash rename to src/scala/scala-stream/sbt-dist/bin/sbt-launch-lib.bash diff --git a/scala-stream/sbt-dist/bin/sbt-launch.jar b/src/scala/scala-stream/sbt-dist/bin/sbt-launch.jar similarity index 100% rename from scala-stream/sbt-dist/bin/sbt-launch.jar rename to src/scala/scala-stream/sbt-dist/bin/sbt-launch.jar diff --git a/scala-stream/sbt-dist/bin/sbt.bat b/src/scala/scala-stream/sbt-dist/bin/sbt.bat similarity index 100% rename from scala-stream/sbt-dist/bin/sbt.bat rename to src/scala/scala-stream/sbt-dist/bin/sbt.bat diff --git a/scala-stream/sbt-dist/conf/sbtconfig.txt b/src/scala/scala-stream/sbt-dist/conf/sbtconfig.txt similarity index 100% rename from scala-stream/sbt-dist/conf/sbtconfig.txt rename to src/scala/scala-stream/sbt-dist/conf/sbtconfig.txt diff --git a/scala-stream/sbt-dist/conf/sbtopts b/src/scala/scala-stream/sbt-dist/conf/sbtopts similarity index 100% rename from scala-stream/sbt-dist/conf/sbtopts rename to src/scala/scala-stream/sbt-dist/conf/sbtopts diff --git a/scala-stream/src/main/scala/scalastream/J8SStream.scala b/src/scala/scala-stream/src/main/scala/scalastream/J8SStream.scala similarity index 100% rename from scala-stream/src/main/scala/scalastream/J8SStream.scala rename to src/scala/scala-stream/src/main/scala/scalastream/J8SStream.scala diff --git a/scala-stream/src/main/scala/scalastream/ParStream.scala b/src/scala/scala-stream/src/main/scala/scalastream/ParStream.scala similarity index 100% rename from scala-stream/src/main/scala/scalastream/ParStream.scala rename to src/scala/scala-stream/src/main/scala/scalastream/ParStream.scala diff --git a/scala-stream/src/main/scala/scalastream/PlainStream.scala b/src/scala/scala-stream/src/main/scala/scalastream/PlainStream.scala similarity index 100% rename from scala-stream/src/main/scala/scalastream/PlainStream.scala rename to src/scala/scala-stream/src/main/scala/scalastream/PlainStream.scala diff --git a/scala-stream/src/main/scala/scalastream/ScalaStream.scala b/src/scala/scala-stream/src/main/scala/scalastream/ScalaStream.scala similarity index 100% rename from scala-stream/src/main/scala/scalastream/ScalaStream.scala rename to src/scala/scala-stream/src/main/scala/scalastream/ScalaStream.scala diff --git a/scala-stream/src/main/scala/scalastream/ThreadStream.scala b/src/scala/scala-stream/src/main/scala/scalastream/ThreadStream.scala similarity index 100% rename from scala-stream/src/main/scala/scalastream/ThreadStream.scala rename to src/scala/scala-stream/src/main/scala/scalastream/ThreadStream.scala diff --git a/src/std/Makefile b/src/std/Makefile deleted file mode 100644 index a5a8847..0000000 --- a/src/std/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. -# -# For full license terms please see the LICENSE file distributed with this -# source code - -CXXFLAGS=-O3 -std=c++17 -stdpar -DSTD -STD_CXX=nvc++ - -std-stream: ../main.cpp STDStream.cpp - $(STD_CXX) $(CXXFLAGS) $^ $(EXTRA_FLAGS) -o $@ -I. -I.. - -.PHONY: clean -clean: - rm -f std-stream diff --git a/src/std20/Makefile b/src/std20/Makefile deleted file mode 100644 index 3a93bcb..0000000 --- a/src/std20/Makefile +++ /dev/null @@ -1,26 +0,0 @@ - -ifndef COMPILER -define compiler_help -Set COMPILER to change flags (defaulting to GNU). -Available compilers are: - GNU - -endef -$(info $(compiler_help)) -COMPILER=GNU -endif - -COMPILER_GNU = g++ -CXX = $(COMPILER_$(COMPILER)) - -FLAGS_GNU = -O3 -std=c++2a -march=native -CXXFLAGS = $(FLAGS_$(COMPILER)) - - -std20-stream: ../main.cpp STD20Stream.cpp - $(CXX) -DSTD20 $(CXXFLAGS) $^ $(EXTRA_FLAGS) -o $@ -I. -I.. - -.PHONY: clean -clean: - rm -f std20-stream - diff --git a/src/sycl/Makefile b/src/sycl/Makefile deleted file mode 100644 index 05d2022..0000000 --- a/src/sycl/Makefile +++ /dev/null @@ -1,81 +0,0 @@ -ifndef COMPILER -define compiler_help -Set COMPILER to change flags (defaulting to GNU). -Available compilers are: - HIPSYCL, DPCPP, COMPUTECPP - - - For HIPSYCL and COMPUTECPP, SYCL_SDK_DIR must be specified, the directory should contain [/lib, /bin, ...] - For DPCPP, the compiler must be on path -endef -$(info $(compiler_help)) -COMPILER=HIPSYCL -endif - -ifndef TARGET -define target_help -Set TARGET to change device (defaulting to CPU). -Available targets are: - CPU AMD NVIDIA - -endef -$(info $(target_help)) -TARGET=CPU -endif - - -ifndef ARCH -define arch_help -Set ARCH to change device (defaulting to ""). -(GPU *only*) Available targets for HIPSYCL are: - For CUDA, the architecture has the form sm_XX, e.g. sm_60 for Pascal. - For ROCm, the architecture has the form gfxYYY, e.g. gfx900 for Vega 10, gfx906 for Vega 20. - -endef - -ifeq ($(COMPILER), HIPSYCL) -ifneq ($(TARGET), CPU) -$(info $(arch_help)) -ARCH= -endif -endif - -endif - -SYCL_COMPUTECPP_SYCLFLAGS = $(shell $(SYCL_SDK_DIR)/bin/computecpp_info --dump-device-compiler-flags) -no-serial-memop -sycl-driver -SYCL_COMPUTECPP_SYCLFLAGS_CPU = $(SYCL_COMPUTECPP_SYCLFLAGS) -SYCL_COMPUTECPP_SYCLFLAGS_AMD = $(SYCL_COMPUTECPP_SYCLFLAGS) -SYCL_COMPUTECPP_SYCLFLAGS_NVIDIA = $(SYCL_COMPUTECPP_SYCLFLAGS) -sycl-target ptx64 -SYCL_COMPUTECPP_SYCLCXX = $(SYCL_SDK_DIR)/bin/compute++ -SYCL_COMPUTECPP_FLAGS = -O3 -std=c++17 -SYCL_COMPUTECPP_LINK_FLAGS = -Wl,-rpath=$(SYCL_SDK_DIR)/lib/ $(SYCL_SDK_DIR)/lib/libComputeCpp.so -lOpenCL -SYCL_COMPUTECPP_INCLUDE = -I$(SYCL_SDK_DIR)/include - -SYCL_HIPSYCL_SYCLFLAGS_CPU = --hipsycl-platform=cpu -SYCL_HIPSYCL_SYCLFLAGS_AMD = --hipsycl-platform=rocm --hipsycl-gpu-arch=$(ARCH) -SYCL_HIPSYCL_SYCLFLAGS_NVIDIA = --hipsycl-platform=cuda --hipsycl-gpu-arch=$(ARCH) -SYCL_HIPSYCL_SYCLCXX = $(SYCL_SDK_DIR)/bin/syclcc -SYCL_HIPSYCL_FLAGS = -O3 --std=c++17 -SYCL_HIPSYCL_LINK_FLAGS = -L$(SYCL_SDK_DIR)/lib -Wl,-rpath,$(SYCL_SDK_DIR)/lib -SYCL_HIPSYCL_INCLUDE = - -SYCL_DPCPP_SYCLFLAGS_NVIDIA = -fsycl -fsycl-targets=nvptx64-nvidia-cuda-sycldevice -fsycl-unnamed-lambda -SYCL_DPCPP_SYCLCXX = dpcpp -SYCL_DPCPP_FLAGS = -O3 --std=c++17 -SYCL_DPCPP_LINK_FLAGS = -SYCL_DPCPP_INCLUDE = - - -SYCL_SYCLFLAGS = $(SYCL_$(COMPILER)_SYCLFLAGS_$(TARGET)) -SYCL_SYCLCXX = $(SYCL_$(COMPILER)_SYCLCXX) -SYCL_FLAGS = $(SYCL_$(COMPILER)_FLAGS) -SYCL_LINK_FLAGS = $(SYCL_$(COMPILER)_LINK_FLAGS) -SYCL_INCLUDE = $(SYCL_$(COMPILER)_INCLUDE) - -# only ComputeCpp generates .sycl files which is a bit odd to deal with so we opted to compile everything together -sycl-stream: ../main.cpp SYCLStream.cpp - $(SYCL_SYCLCXX) $(SYCL_SYCLFLAGS) $(SYCL_FLAGS) $(SYCL_INCLUDE) -DSYCL $(EXTRA_FLAGS) $(SYCL_LINK_FLAGS) $^ -o $@ -I. -I.. - -.PHONY: clean -clean: - rm -f sycl-stream diff --git a/TBB.cmake b/src/tbb/TBB.cmake similarity index 100% rename from TBB.cmake rename to src/tbb/TBB.cmake diff --git a/TBBStream.cpp b/src/tbb/TBBStream.cpp similarity index 100% rename from TBBStream.cpp rename to src/tbb/TBBStream.cpp diff --git a/TBBStream.hpp b/src/tbb/TBBStream.hpp similarity index 100% rename from TBBStream.hpp rename to src/tbb/TBBStream.hpp diff --git a/THRUST.cmake b/src/thrust/THRUST.cmake similarity index 100% rename from THRUST.cmake rename to src/thrust/THRUST.cmake diff --git a/ThrustStream.cu b/src/thrust/ThrustStream.cu similarity index 100% rename from ThrustStream.cu rename to src/thrust/ThrustStream.cu diff --git a/ThrustStream.h b/src/thrust/ThrustStream.h similarity index 100% rename from ThrustStream.h rename to src/thrust/ThrustStream.h From 565c8c7f95a57835f4900d0d257a72688efb6236 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Tue, 30 Nov 2021 19:03:04 +0000 Subject: [PATCH 08/10] Fix cache path --- .github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 423064a..0d6b736 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -59,7 +59,7 @@ jobs: uses: actions/cache@v2 with: path: compilers - key: ${{ runner.os }}-${{ hashFiles('./src/ci-prepare-bionic.sh') }} + key: ${{ runner.os }}-${{ hashFiles('./ci-prepare-bionic.sh') }} - name: Prepare compilers if: steps.prepare-compilers.outputs.cache-hit != 'true' From 53f3b7b8a4be7481502916b6da319a83220ea923 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Wed, 1 Dec 2021 16:19:50 +0000 Subject: [PATCH 09/10] Fetch CL headers on-demand for ComputeCpp and OCL build Minor CI adjustments for act Fix TBB and Thrust cmake builds --- .github/workflows/main.yaml | 1 + src/CMakeLists.txt | 18 ++++++++++++++++-- src/ci-prepare-bionic.sh | 24 +++++++++++++++--------- src/ocl/model.cmake | 3 +-- src/sycl/SYCLStream.h | 10 ---------- src/sycl/model.cmake | 3 +-- src/tbb/{TBB.cmake => model.cmake} | 0 src/thrust/{THRUST.cmake => model.cmake} | 0 8 files changed, 34 insertions(+), 25 deletions(-) rename src/tbb/{TBB.cmake => model.cmake} (100%) rename src/thrust/{THRUST.cmake => model.cmake} (100%) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 0d6b736..9b7489b 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -55,6 +55,7 @@ jobs: - uses: actions/checkout@v2 - name: Cache compiler + if: ${{ !env.ACT }} id: prepare-compilers uses: actions/cache@v2 with: diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 81fa78b..760bbd6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,7 +7,21 @@ project(BabelStream VERSION 3.5 LANGUAGES CXX) # some nicer defaults for standard C++ set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_STANDARD_REQUIRED ON) +include(FetchContent) +FetchContent_Declare( + opencl_header + URL https://github.com/KhronosGroup/OpenCL-Headers/archive/refs/tags/v2021.06.30.zip + URL_HASH MD5=af7ab7918a6a11c60370c8651a9f0192 +) + +macro(setup_opencl_header_includes) + FetchContent_GetProperties(opencl_header) + if (NOT opencl_header_POPULATED) + FetchContent_Populate(opencl_header) + set(OpenCL_INCLUDE_DIR ${opencl_header_SOURCE_DIR}) + endif () +endmacro() #set(MODEL SYCL) #set(SYCL_COMPILER COMPUTECPP) @@ -149,7 +163,7 @@ message(STATUS "Default ${CMAKE_BUILD_TYPE} flags are `${DEFAULT_${BUILD_TYPE}_F # setup common build flag defaults if there are no overrides if (NOT DEFINED ${BUILD_TYPE}_FLAGS) set(ACTUAL_${BUILD_TYPE}_FLAGS ${DEFAULT_${BUILD_TYPE}_FLAGS}) - elseif() +elseif () set(ACTUAL_${BUILD_TYPE}_FLAGS ${${BUILD_TYPE}_FLAGS}) endif () @@ -192,4 +206,4 @@ if (COMMAND setup_target) setup_target(${EXE_NAME}) endif () -install (TARGETS ${EXE_NAME} DESTINATION bin) \ No newline at end of file +install(TARGETS ${EXE_NAME} DESTINATION bin) \ No newline at end of file diff --git a/src/ci-prepare-bionic.sh b/src/ci-prepare-bionic.sh index 7294905..b77f404 100755 --- a/src/ci-prepare-bionic.sh +++ b/src/ci-prepare-bionic.sh @@ -225,10 +225,7 @@ setup_tbb() { setup_clang_gcc() { - echo "deb http://archive.ubuntu.com/ubuntu focal main universe" | sudo tee -a /etc/apt/sources.list - - sudo apt-get update -qq - sudo apt-get install -y -qq gcc-10-offload-nvptx gcc-10-offload-amdgcn libtbb2 libtbb-dev g++-10 + sudo apt-get install -y -qq gcc-10-offload-nvptx gcc-10-offload-amdgcn libtbb2 libtbb-dev g++-10 clang export_var GCC_CXX "$(which g++-10)" verify_bin_exists "$GCC_CXX" @@ -251,9 +248,6 @@ setup_clang_gcc() { } setup_rocm() { - wget -q -O - "https://repo.radeon.com/rocm/rocm.gpg.key" | sudo apt-key add - - echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/4.5 ubuntu main' | sudo tee /etc/apt/sources.list.d/rocm.list - sudo apt-get update -qq sudo apt-get install -y -qq rocm-dev rocthrust-dev export_var ROCM_PATH "/opt/rocm" export_var PATH "$ROCM_PATH/bin:$PATH" # ROCm needs this for many of their libraries' CMake build to work @@ -320,9 +314,21 @@ if [ "${GITHUB_ACTIONS:-false}" = true ]; then echo "Running in GitHub Actions, defaulting to special export" TERM=xterm export TERM=xterm + + # drop the lock in case we got one from a failed run + rm /var/lib/dpkg/lock-frontend || true + rm /var/cache/apt/archives/lock || true + + wget -q -O - "https://repo.radeon.com/rocm/rocm.gpg.key" | sudo apt-key add - + echo "deb http://archive.ubuntu.com/ubuntu focal main universe" | sudo tee -a /etc/apt/sources.list + echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/4.5 ubuntu main' | sudo tee /etc/apt/sources.list.d/rocm.list + + sudo apt-get update -qq + sudo apt-get install -y -qq cmake + if [ "$SETUP" = true ]; then - echo "Deleting extra packages for space in 5 seconds..." - sleep 5 + echo "Deleting extra packages for space in 2 seconds..." + sleep 2 echo "Starting apt-get remove:" sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel sudo apt-get autoremove -y diff --git a/src/ocl/model.cmake b/src/ocl/model.cmake index 2be3981..acefe71 100644 --- a/src/ocl/model.cmake +++ b/src/ocl/model.cmake @@ -9,8 +9,7 @@ register_flag_optional(OpenCL_LIBRARY macro(setup) - # don't point to the CL dir as the imports already have the CL prefix - set(OpenCL_INCLUDE_DIR "${CMAKE_SOURCE_DIR}") + setup_opencl_header_includes() find_package(OpenCL REQUIRED) register_link_library(OpenCL::OpenCL) endmacro() diff --git a/src/sycl/SYCLStream.h b/src/sycl/SYCLStream.h index dd13387..d3fa18d 100644 --- a/src/sycl/SYCLStream.h +++ b/src/sycl/SYCLStream.h @@ -10,16 +10,6 @@ #include #include "Stream.h" - -#include "CL/opencl.h" - -// XXX Intel's SYCL impl. needs CL_MEM_CHANNEL_INTEL which is provided in dpcpp's include dir -// however, depending the system configuration, the system CL header sometimes takes precedence -// we only really need this macro to refer to the extension so this is probably OK -#ifndef CL_MEM_CHANNEL_INTEL -#define CL_MEM_CHANNEL_INTEL 0x4213 -#endif - #include "CL/sycl.hpp" #define IMPLEMENTATION_STRING "SYCL" diff --git a/src/sycl/model.cmake b/src/sycl/model.cmake index c0c6c3f..e7b5a1c 100644 --- a/src/sycl/model.cmake +++ b/src/sycl/model.cmake @@ -47,8 +47,7 @@ macro(setup) list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules) set(ComputeCpp_DIR ${SYCL_COMPILER_DIR}) - # don't point to the CL dir as the imports already have the CL prefix - set(OpenCL_INCLUDE_DIR "${CMAKE_SOURCE_DIR}") + setup_opencl_header_includes() register_definitions(CL_TARGET_OPENCL_VERSION=220 _GLIBCXX_USE_CXX11_ABI=0) # ComputeCpp needs OpenCL diff --git a/src/tbb/TBB.cmake b/src/tbb/model.cmake similarity index 100% rename from src/tbb/TBB.cmake rename to src/tbb/model.cmake diff --git a/src/thrust/THRUST.cmake b/src/thrust/model.cmake similarity index 100% rename from src/thrust/THRUST.cmake rename to src/thrust/model.cmake From 7bf0b212d9bf18c6e236feebd533afc98859c5af Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Wed, 1 Dec 2021 16:40:05 +0000 Subject: [PATCH 10/10] Fix CI missing omp library Fix thrust model typo --- src/CMakeLists.txt | 2 +- src/ci-prepare-bionic.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 760bbd6..553bfcb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -127,7 +127,7 @@ register_model(acc ACC ACCStream.cpp) # defining RAJA collides with the RAJA namespace so USE_RAJA register_model(raja USE_RAJA RAJAStream.cpp) register_model(tbb TBB TBBStream.cpp) -register_model(thurst THRUST ThrustStream.cu) # Thrust uses cu, even for rocThrust +register_model(thrust THRUST ThrustStream.cu) # Thrust uses cu, even for rocThrust set(USAGE ON CACHE BOOL "Whether to print all custom flags for the selected model") diff --git a/src/ci-prepare-bionic.sh b/src/ci-prepare-bionic.sh index b77f404..656d338 100755 --- a/src/ci-prepare-bionic.sh +++ b/src/ci-prepare-bionic.sh @@ -225,7 +225,7 @@ setup_tbb() { setup_clang_gcc() { - sudo apt-get install -y -qq gcc-10-offload-nvptx gcc-10-offload-amdgcn libtbb2 libtbb-dev g++-10 clang + sudo apt-get install -y -qq gcc-10-offload-nvptx gcc-10-offload-amdgcn libtbb2 libtbb-dev g++-10 clang libomp-dev export_var GCC_CXX "$(which g++-10)" verify_bin_exists "$GCC_CXX"