Merge pull request #102 from UoB-HPC/top-level-src
Move all C++ impl. to ./cpp and remove Makefiles
This commit is contained in:
commit
4ab6eac3a5
14
.github/workflows/main.yaml
vendored
14
.github/workflows/main.yaml
vendored
@ -8,12 +8,12 @@ jobs:
|
|||||||
runs-on: ubuntu-18.04
|
runs-on: ubuntu-18.04
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
working-directory: ./java-stream
|
working-directory: ./src/java/java-stream
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
- name: Test build project
|
- name: Test build project
|
||||||
run: ./mvnw clean package
|
run: ./mvnw clean package
|
||||||
- name: Test run
|
- name: Test run
|
||||||
if: ${{ ! cancelled() }}
|
if: ${{ ! cancelled() }}
|
||||||
run: java -jar target/java-stream.jar --arraysize 2048
|
run: java -jar target/java-stream.jar --arraysize 2048
|
||||||
|
|
||||||
@ -21,7 +21,7 @@ jobs:
|
|||||||
runs-on: ubuntu-18.04
|
runs-on: ubuntu-18.04
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
working-directory: ./JuliaStream.jl
|
working-directory: ./src/julia/JuliaStream.jl
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
- name: Setup project
|
- name: Setup project
|
||||||
@ -46,17 +46,21 @@ jobs:
|
|||||||
run: julia --project src/AMDGPUStream.jl --list
|
run: julia --project src/AMDGPUStream.jl --list
|
||||||
|
|
||||||
|
|
||||||
test:
|
test-cpp:
|
||||||
runs-on: ubuntu-18.04
|
runs-on: ubuntu-18.04
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
working-directory: ./src
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
|
|
||||||
- name: Cache compiler
|
- name: Cache compiler
|
||||||
|
if: ${{ !env.ACT }}
|
||||||
id: prepare-compilers
|
id: prepare-compilers
|
||||||
uses: actions/cache@v2
|
uses: actions/cache@v2
|
||||||
with:
|
with:
|
||||||
path: compilers
|
path: compilers
|
||||||
key: ${{ runner.os }}-${{ hashFiles('ci-prepare-bionic.sh') }}
|
key: ${{ runner.os }}-${{ hashFiles('./ci-prepare-bionic.sh') }}
|
||||||
|
|
||||||
- name: Prepare compilers
|
- name: Prepare compilers
|
||||||
if: steps.prepare-compilers.outputs.cache-hit != 'true'
|
if: steps.prepare-compilers.outputs.cache-hit != 'true'
|
||||||
|
|||||||
2
.gitignore
vendored
2
.gitignore
vendored
@ -28,4 +28,4 @@ cmake-build-*/
|
|||||||
CMakeFiles/
|
CMakeFiles/
|
||||||
.idea/
|
.idea/
|
||||||
.vscode/
|
.vscode/
|
||||||
.directory
|
.directory
|
||||||
|
|||||||
117
CL/cl_d3d10.h
117
CL/cl_d3d10.h
@ -1,117 +0,0 @@
|
|||||||
/*******************************************************************************
|
|
||||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
#ifndef __OPENCL_CL_D3D10_H
|
|
||||||
#define __OPENCL_CL_D3D10_H
|
|
||||||
|
|
||||||
#include <d3d10.h>
|
|
||||||
#include <CL/cl.h>
|
|
||||||
#include <CL/cl_platform.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/******************************************************************************
|
|
||||||
* cl_khr_d3d10_sharing */
|
|
||||||
#define cl_khr_d3d10_sharing 1
|
|
||||||
|
|
||||||
typedef cl_uint cl_d3d10_device_source_khr;
|
|
||||||
typedef cl_uint cl_d3d10_device_set_khr;
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
/* Error Codes */
|
|
||||||
#define CL_INVALID_D3D10_DEVICE_KHR -1002
|
|
||||||
#define CL_INVALID_D3D10_RESOURCE_KHR -1003
|
|
||||||
#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
|
|
||||||
#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
|
|
||||||
|
|
||||||
/* cl_d3d10_device_source_nv */
|
|
||||||
#define CL_D3D10_DEVICE_KHR 0x4010
|
|
||||||
#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
|
|
||||||
|
|
||||||
/* cl_d3d10_device_set_nv */
|
|
||||||
#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
|
|
||||||
#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
|
|
||||||
|
|
||||||
/* cl_context_info */
|
|
||||||
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
|
|
||||||
#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
|
|
||||||
|
|
||||||
/* cl_mem_info */
|
|
||||||
#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
|
|
||||||
|
|
||||||
/* cl_image_info */
|
|
||||||
#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
|
|
||||||
|
|
||||||
/* cl_command_type */
|
|
||||||
#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
|
|
||||||
#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
|
|
||||||
cl_platform_id platform,
|
|
||||||
cl_d3d10_device_source_khr d3d_device_source,
|
|
||||||
void * d3d_object,
|
|
||||||
cl_d3d10_device_set_khr d3d_device_set,
|
|
||||||
cl_uint num_entries,
|
|
||||||
cl_device_id * devices,
|
|
||||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
|
|
||||||
cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
ID3D10Buffer * resource,
|
|
||||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
|
|
||||||
cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
ID3D10Texture2D * resource,
|
|
||||||
UINT subresource,
|
|
||||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
|
|
||||||
cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
ID3D10Texture3D * resource,
|
|
||||||
UINT subresource,
|
|
||||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem * mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem * mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* __OPENCL_CL_D3D10_H */
|
|
||||||
|
|
||||||
117
CL/cl_d3d11.h
117
CL/cl_d3d11.h
@ -1,117 +0,0 @@
|
|||||||
/*******************************************************************************
|
|
||||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
#ifndef __OPENCL_CL_D3D11_H
|
|
||||||
#define __OPENCL_CL_D3D11_H
|
|
||||||
|
|
||||||
#include <d3d11.h>
|
|
||||||
#include <CL/cl.h>
|
|
||||||
#include <CL/cl_platform.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/******************************************************************************
|
|
||||||
* cl_khr_d3d11_sharing */
|
|
||||||
#define cl_khr_d3d11_sharing 1
|
|
||||||
|
|
||||||
typedef cl_uint cl_d3d11_device_source_khr;
|
|
||||||
typedef cl_uint cl_d3d11_device_set_khr;
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
/* Error Codes */
|
|
||||||
#define CL_INVALID_D3D11_DEVICE_KHR -1006
|
|
||||||
#define CL_INVALID_D3D11_RESOURCE_KHR -1007
|
|
||||||
#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008
|
|
||||||
#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009
|
|
||||||
|
|
||||||
/* cl_d3d11_device_source */
|
|
||||||
#define CL_D3D11_DEVICE_KHR 0x4019
|
|
||||||
#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A
|
|
||||||
|
|
||||||
/* cl_d3d11_device_set */
|
|
||||||
#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B
|
|
||||||
#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C
|
|
||||||
|
|
||||||
/* cl_context_info */
|
|
||||||
#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D
|
|
||||||
#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D
|
|
||||||
|
|
||||||
/* cl_mem_info */
|
|
||||||
#define CL_MEM_D3D11_RESOURCE_KHR 0x401E
|
|
||||||
|
|
||||||
/* cl_image_info */
|
|
||||||
#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F
|
|
||||||
|
|
||||||
/* cl_command_type */
|
|
||||||
#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020
|
|
||||||
#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)(
|
|
||||||
cl_platform_id platform,
|
|
||||||
cl_d3d11_device_source_khr d3d_device_source,
|
|
||||||
void * d3d_object,
|
|
||||||
cl_d3d11_device_set_khr d3d_device_set,
|
|
||||||
cl_uint num_entries,
|
|
||||||
cl_device_id * devices,
|
|
||||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)(
|
|
||||||
cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
ID3D11Buffer * resource,
|
|
||||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)(
|
|
||||||
cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
ID3D11Texture2D * resource,
|
|
||||||
UINT subresource,
|
|
||||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)(
|
|
||||||
cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
ID3D11Texture3D * resource,
|
|
||||||
UINT subresource,
|
|
||||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem * mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem * mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* __OPENCL_CL_D3D11_H */
|
|
||||||
|
|
||||||
@ -1,118 +0,0 @@
|
|||||||
/*******************************************************************************
|
|
||||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H
|
|
||||||
#define __OPENCL_CL_DX9_MEDIA_SHARING_H
|
|
||||||
|
|
||||||
#include <CL/cl.h>
|
|
||||||
#include <CL/cl_platform.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
/* cl_khr_dx9_media_sharing */
|
|
||||||
#define cl_khr_dx9_media_sharing 1
|
|
||||||
|
|
||||||
typedef cl_uint cl_dx9_media_adapter_type_khr;
|
|
||||||
typedef cl_uint cl_dx9_media_adapter_set_khr;
|
|
||||||
|
|
||||||
#if defined(_WIN32)
|
|
||||||
#include <d3d9.h>
|
|
||||||
typedef struct _cl_dx9_surface_info_khr
|
|
||||||
{
|
|
||||||
IDirect3DSurface9 *resource;
|
|
||||||
HANDLE shared_handle;
|
|
||||||
} cl_dx9_surface_info_khr;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
/* Error Codes */
|
|
||||||
#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010
|
|
||||||
#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011
|
|
||||||
#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012
|
|
||||||
#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013
|
|
||||||
|
|
||||||
/* cl_media_adapter_type_khr */
|
|
||||||
#define CL_ADAPTER_D3D9_KHR 0x2020
|
|
||||||
#define CL_ADAPTER_D3D9EX_KHR 0x2021
|
|
||||||
#define CL_ADAPTER_DXVA_KHR 0x2022
|
|
||||||
|
|
||||||
/* cl_media_adapter_set_khr */
|
|
||||||
#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023
|
|
||||||
#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024
|
|
||||||
|
|
||||||
/* cl_context_info */
|
|
||||||
#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025
|
|
||||||
#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026
|
|
||||||
#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027
|
|
||||||
|
|
||||||
/* cl_mem_info */
|
|
||||||
#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028
|
|
||||||
#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029
|
|
||||||
|
|
||||||
/* cl_image_info */
|
|
||||||
#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A
|
|
||||||
|
|
||||||
/* cl_command_type */
|
|
||||||
#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B
|
|
||||||
#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
|
|
||||||
cl_platform_id platform,
|
|
||||||
cl_uint num_media_adapters,
|
|
||||||
cl_dx9_media_adapter_type_khr * media_adapter_type,
|
|
||||||
void * media_adapters,
|
|
||||||
cl_dx9_media_adapter_set_khr media_adapter_set,
|
|
||||||
cl_uint num_entries,
|
|
||||||
cl_device_id * devices,
|
|
||||||
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
|
|
||||||
cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
cl_dx9_media_adapter_type_khr adapter_type,
|
|
||||||
void * surface_info,
|
|
||||||
cl_uint plane,
|
|
||||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem * mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem * mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */
|
|
||||||
|
|
||||||
@ -1,170 +0,0 @@
|
|||||||
/*******************************************************************************
|
|
||||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
/*****************************************************************************\
|
|
||||||
|
|
||||||
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
|
|
||||||
|
|
||||||
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
|
||||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
|
||||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
|
|
||||||
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
File Name: cl_dx9_media_sharing_intel.h
|
|
||||||
|
|
||||||
Abstract:
|
|
||||||
|
|
||||||
Notes:
|
|
||||||
|
|
||||||
\*****************************************************************************/
|
|
||||||
|
|
||||||
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
|
|
||||||
#define __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
|
|
||||||
|
|
||||||
#include <CL/cl.h>
|
|
||||||
#include <CL/cl_platform.h>
|
|
||||||
#include <d3d9.h>
|
|
||||||
#include <dxvahd.h>
|
|
||||||
#include <wtypes.h>
|
|
||||||
#include <d3d9types.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/***************************************
|
|
||||||
* cl_intel_dx9_media_sharing extension *
|
|
||||||
****************************************/
|
|
||||||
|
|
||||||
#define cl_intel_dx9_media_sharing 1
|
|
||||||
|
|
||||||
typedef cl_uint cl_dx9_device_source_intel;
|
|
||||||
typedef cl_uint cl_dx9_device_set_intel;
|
|
||||||
|
|
||||||
/* error codes */
|
|
||||||
#define CL_INVALID_DX9_DEVICE_INTEL -1010
|
|
||||||
#define CL_INVALID_DX9_RESOURCE_INTEL -1011
|
|
||||||
#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012
|
|
||||||
#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013
|
|
||||||
|
|
||||||
/* cl_dx9_device_source_intel */
|
|
||||||
#define CL_D3D9_DEVICE_INTEL 0x4022
|
|
||||||
#define CL_D3D9EX_DEVICE_INTEL 0x4070
|
|
||||||
#define CL_DXVA_DEVICE_INTEL 0x4071
|
|
||||||
|
|
||||||
/* cl_dx9_device_set_intel */
|
|
||||||
#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024
|
|
||||||
#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025
|
|
||||||
|
|
||||||
/* cl_context_info */
|
|
||||||
#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026
|
|
||||||
#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072
|
|
||||||
#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073
|
|
||||||
|
|
||||||
/* cl_mem_info */
|
|
||||||
#define CL_MEM_DX9_RESOURCE_INTEL 0x4027
|
|
||||||
#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074
|
|
||||||
|
|
||||||
/* cl_image_info */
|
|
||||||
#define CL_IMAGE_DX9_PLANE_INTEL 0x4075
|
|
||||||
|
|
||||||
/* cl_command_type */
|
|
||||||
#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A
|
|
||||||
#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clGetDeviceIDsFromDX9INTEL(
|
|
||||||
cl_platform_id platform,
|
|
||||||
cl_dx9_device_source_intel dx9_device_source,
|
|
||||||
void* dx9_object,
|
|
||||||
cl_dx9_device_set_intel dx9_device_set,
|
|
||||||
cl_uint num_entries,
|
|
||||||
cl_device_id* devices,
|
|
||||||
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)(
|
|
||||||
cl_platform_id platform,
|
|
||||||
cl_dx9_device_source_intel dx9_device_source,
|
|
||||||
void* dx9_object,
|
|
||||||
cl_dx9_device_set_intel dx9_device_set,
|
|
||||||
cl_uint num_entries,
|
|
||||||
cl_device_id* devices,
|
|
||||||
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
|
||||||
clCreateFromDX9MediaSurfaceINTEL(
|
|
||||||
cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
IDirect3DSurface9* resource,
|
|
||||||
HANDLE sharedHandle,
|
|
||||||
UINT plane,
|
|
||||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)(
|
|
||||||
cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
IDirect3DSurface9* resource,
|
|
||||||
HANDLE sharedHandle,
|
|
||||||
UINT plane,
|
|
||||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueAcquireDX9ObjectsINTEL(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem* mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem* mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueReleaseDX9ObjectsINTEL(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
cl_mem* mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
cl_mem* mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H */
|
|
||||||
|
|
||||||
120
CL/cl_egl.h
120
CL/cl_egl.h
@ -1,120 +0,0 @@
|
|||||||
/*******************************************************************************
|
|
||||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
#ifndef __OPENCL_CL_EGL_H
|
|
||||||
#define __OPENCL_CL_EGL_H
|
|
||||||
|
|
||||||
#include <CL/cl.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
|
|
||||||
#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F
|
|
||||||
#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D
|
|
||||||
#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E
|
|
||||||
|
|
||||||
/* Error type for clCreateFromEGLImageKHR */
|
|
||||||
#define CL_INVALID_EGL_OBJECT_KHR -1093
|
|
||||||
#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092
|
|
||||||
|
|
||||||
/* CLeglImageKHR is an opaque handle to an EGLImage */
|
|
||||||
typedef void* CLeglImageKHR;
|
|
||||||
|
|
||||||
/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
|
|
||||||
typedef void* CLeglDisplayKHR;
|
|
||||||
|
|
||||||
/* CLeglSyncKHR is an opaque handle to an EGLSync object */
|
|
||||||
typedef void* CLeglSyncKHR;
|
|
||||||
|
|
||||||
/* properties passed to clCreateFromEGLImageKHR */
|
|
||||||
typedef intptr_t cl_egl_image_properties_khr;
|
|
||||||
|
|
||||||
|
|
||||||
#define cl_khr_egl_image 1
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
|
||||||
clCreateFromEGLImageKHR(cl_context context,
|
|
||||||
CLeglDisplayKHR egldisplay,
|
|
||||||
CLeglImageKHR eglimage,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
const cl_egl_image_properties_khr * properties,
|
|
||||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
|
|
||||||
cl_context context,
|
|
||||||
CLeglDisplayKHR egldisplay,
|
|
||||||
CLeglImageKHR eglimage,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
const cl_egl_image_properties_khr * properties,
|
|
||||||
cl_int * errcode_ret);
|
|
||||||
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem * mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem * mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event);
|
|
||||||
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem * mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem * mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event);
|
|
||||||
|
|
||||||
|
|
||||||
#define cl_khr_egl_event 1
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
|
||||||
clCreateEventFromEGLSyncKHR(cl_context context,
|
|
||||||
CLeglSyncKHR sync,
|
|
||||||
CLeglDisplayKHR display,
|
|
||||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
|
|
||||||
cl_context context,
|
|
||||||
CLeglSyncKHR sync,
|
|
||||||
CLeglDisplayKHR display,
|
|
||||||
cl_int * errcode_ret);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* __OPENCL_CL_EGL_H */
|
|
||||||
841
CL/cl_ext.h
841
CL/cl_ext.h
@ -1,841 +0,0 @@
|
|||||||
/*******************************************************************************
|
|
||||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
/* cl_ext.h contains OpenCL extensions which don't have external */
|
|
||||||
/* (OpenGL, D3D) dependencies. */
|
|
||||||
|
|
||||||
#ifndef __CL_EXT_H
|
|
||||||
#define __CL_EXT_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <CL/cl.h>
|
|
||||||
|
|
||||||
/* cl_khr_fp64 extension - no extension #define since it has no functions */
|
|
||||||
/* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */
|
|
||||||
|
|
||||||
#if CL_TARGET_OPENCL_VERSION <= 110
|
|
||||||
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* cl_khr_fp16 extension - no extension #define since it has no functions */
|
|
||||||
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
|
|
||||||
|
|
||||||
/* Memory object destruction
|
|
||||||
*
|
|
||||||
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
|
|
||||||
*
|
|
||||||
* Registers a user callback function that will be called when the memory object is deleted and its resources
|
|
||||||
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
|
|
||||||
* stack associated with memobj. The registered user callback functions are called in the reverse order in
|
|
||||||
* which they were registered. The user callback functions are called and then the memory object is deleted
|
|
||||||
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
|
|
||||||
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
|
|
||||||
* the storage bits for the memory object, can be reused or freed.
|
|
||||||
*
|
|
||||||
* The application may not call CL api's with the cl_mem object passed to the pfn_notify.
|
|
||||||
*
|
|
||||||
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
|
|
||||||
* before using.
|
|
||||||
*/
|
|
||||||
#define cl_APPLE_SetMemObjectDestructor 1
|
|
||||||
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem memobj,
|
|
||||||
void (* pfn_notify)(cl_mem memobj, void * user_data),
|
|
||||||
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
|
|
||||||
/* Context Logging Functions
|
|
||||||
*
|
|
||||||
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
|
|
||||||
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
|
|
||||||
* before using.
|
|
||||||
*
|
|
||||||
* clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger
|
|
||||||
*/
|
|
||||||
#define cl_APPLE_ContextLoggingFunctions 1
|
|
||||||
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * errstr,
|
|
||||||
const void * private_info,
|
|
||||||
size_t cb,
|
|
||||||
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
|
|
||||||
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * errstr,
|
|
||||||
const void * private_info,
|
|
||||||
size_t cb,
|
|
||||||
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
|
|
||||||
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * errstr,
|
|
||||||
const void * private_info,
|
|
||||||
size_t cb,
|
|
||||||
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
|
|
||||||
/************************
|
|
||||||
* cl_khr_icd extension *
|
|
||||||
************************/
|
|
||||||
#define cl_khr_icd 1
|
|
||||||
|
|
||||||
/* cl_platform_info */
|
|
||||||
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
|
|
||||||
|
|
||||||
/* Additional Error Codes */
|
|
||||||
#define CL_PLATFORM_NOT_FOUND_KHR -1001
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clIcdGetPlatformIDsKHR(cl_uint num_entries,
|
|
||||||
cl_platform_id * platforms,
|
|
||||||
cl_uint * num_platforms);
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int
|
|
||||||
(CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint num_entries,
|
|
||||||
cl_platform_id * platforms,
|
|
||||||
cl_uint * num_platforms);
|
|
||||||
|
|
||||||
|
|
||||||
/*******************************
|
|
||||||
* cl_khr_il_program extension *
|
|
||||||
*******************************/
|
|
||||||
#define cl_khr_il_program 1
|
|
||||||
|
|
||||||
/* New property to clGetDeviceInfo for retrieving supported intermediate
|
|
||||||
* languages
|
|
||||||
*/
|
|
||||||
#define CL_DEVICE_IL_VERSION_KHR 0x105B
|
|
||||||
|
|
||||||
/* New property to clGetProgramInfo for retrieving for retrieving the IL of a
|
|
||||||
* program
|
|
||||||
*/
|
|
||||||
#define CL_PROGRAM_IL_KHR 0x1169
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_program CL_API_CALL
|
|
||||||
clCreateProgramWithILKHR(cl_context context,
|
|
||||||
const void * il,
|
|
||||||
size_t length,
|
|
||||||
cl_int * errcode_ret);
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_program
|
|
||||||
(CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context context,
|
|
||||||
const void * il,
|
|
||||||
size_t length,
|
|
||||||
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
/* Extension: cl_khr_image2d_from_buffer
|
|
||||||
*
|
|
||||||
* This extension allows a 2D image to be created from a cl_mem buffer without
|
|
||||||
* a copy. The type associated with a 2D image created from a buffer in an
|
|
||||||
* OpenCL program is image2d_t. Both the sampler and sampler-less read_image
|
|
||||||
* built-in functions are supported for 2D images and 2D images created from
|
|
||||||
* a buffer. Similarly, the write_image built-ins are also supported for 2D
|
|
||||||
* images created from a buffer.
|
|
||||||
*
|
|
||||||
* When the 2D image from buffer is created, the client must specify the
|
|
||||||
* width, height, image format (i.e. channel order and channel data type)
|
|
||||||
* and optionally the row pitch.
|
|
||||||
*
|
|
||||||
* The pitch specified must be a multiple of
|
|
||||||
* CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR pixels.
|
|
||||||
* The base address of the buffer must be aligned to
|
|
||||||
* CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR pixels.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR 0x104A
|
|
||||||
#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR 0x104B
|
|
||||||
|
|
||||||
|
|
||||||
/**************************************
|
|
||||||
* cl_khr_initialize_memory extension *
|
|
||||||
**************************************/
|
|
||||||
|
|
||||||
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030
|
|
||||||
|
|
||||||
|
|
||||||
/**************************************
|
|
||||||
* cl_khr_terminate_context extension *
|
|
||||||
**************************************/
|
|
||||||
|
|
||||||
#define CL_CONTEXT_TERMINATED_KHR -1121
|
|
||||||
|
|
||||||
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031
|
|
||||||
#define CL_CONTEXT_TERMINATE_KHR 0x2032
|
|
||||||
|
|
||||||
#define cl_khr_terminate_context 1
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clTerminateContextKHR(cl_context context) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int
|
|
||||||
(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Extension: cl_khr_spir
|
|
||||||
*
|
|
||||||
* This extension adds support to create an OpenCL program object from a
|
|
||||||
* Standard Portable Intermediate Representation (SPIR) instance
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define CL_DEVICE_SPIR_VERSIONS 0x40E0
|
|
||||||
#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1
|
|
||||||
|
|
||||||
|
|
||||||
/*****************************************
|
|
||||||
* cl_khr_create_command_queue extension *
|
|
||||||
*****************************************/
|
|
||||||
#define cl_khr_create_command_queue 1
|
|
||||||
|
|
||||||
typedef cl_bitfield cl_queue_properties_khr;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_command_queue CL_API_CALL
|
|
||||||
clCreateCommandQueueWithPropertiesKHR(cl_context context,
|
|
||||||
cl_device_id device,
|
|
||||||
const cl_queue_properties_khr* properties,
|
|
||||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_command_queue
|
|
||||||
(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context,
|
|
||||||
cl_device_id device,
|
|
||||||
const cl_queue_properties_khr* properties,
|
|
||||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
|
|
||||||
/******************************************
|
|
||||||
* cl_nv_device_attribute_query extension *
|
|
||||||
******************************************/
|
|
||||||
|
|
||||||
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
|
|
||||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
|
|
||||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
|
|
||||||
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
|
|
||||||
#define CL_DEVICE_WARP_SIZE_NV 0x4003
|
|
||||||
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
|
|
||||||
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
|
|
||||||
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
|
|
||||||
|
|
||||||
|
|
||||||
/*********************************
|
|
||||||
* cl_amd_device_attribute_query *
|
|
||||||
*********************************/
|
|
||||||
|
|
||||||
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
|
|
||||||
#define CL_DEVICE_TOPOLOGY_AMD 0x4037
|
|
||||||
#define CL_DEVICE_BOARD_NAME_AMD 0x4038
|
|
||||||
#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039
|
|
||||||
#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040
|
|
||||||
#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041
|
|
||||||
#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042
|
|
||||||
#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043
|
|
||||||
#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044
|
|
||||||
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045
|
|
||||||
#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046
|
|
||||||
#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047
|
|
||||||
#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048
|
|
||||||
#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049
|
|
||||||
#define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A
|
|
||||||
#define CL_DEVICE_GFXIP_MINOR_AMD 0x404B
|
|
||||||
#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C
|
|
||||||
#define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD 0x4030
|
|
||||||
#define CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD 0x4031
|
|
||||||
#define CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD 0x4033
|
|
||||||
#define CL_DEVICE_PCIE_ID_AMD 0x4034
|
|
||||||
|
|
||||||
|
|
||||||
/*********************************
|
|
||||||
* cl_arm_printf extension
|
|
||||||
*********************************/
|
|
||||||
|
|
||||||
#define CL_PRINTF_CALLBACK_ARM 0x40B0
|
|
||||||
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1
|
|
||||||
|
|
||||||
|
|
||||||
/***********************************
|
|
||||||
* cl_ext_device_fission extension
|
|
||||||
***********************************/
|
|
||||||
#define cl_ext_device_fission 1
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clReleaseDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int
|
|
||||||
(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clRetainDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int
|
|
||||||
(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
|
|
||||||
|
|
||||||
typedef cl_ulong cl_device_partition_property_ext;
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clCreateSubDevicesEXT(cl_device_id in_device,
|
|
||||||
const cl_device_partition_property_ext * properties,
|
|
||||||
cl_uint num_entries,
|
|
||||||
cl_device_id * out_devices,
|
|
||||||
cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int
|
|
||||||
(CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id in_device,
|
|
||||||
const cl_device_partition_property_ext * properties,
|
|
||||||
cl_uint num_entries,
|
|
||||||
cl_device_id * out_devices,
|
|
||||||
cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1;
|
|
||||||
|
|
||||||
/* cl_device_partition_property_ext */
|
|
||||||
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
|
|
||||||
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
|
|
||||||
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
|
|
||||||
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
|
|
||||||
|
|
||||||
/* clDeviceGetInfo selectors */
|
|
||||||
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
|
|
||||||
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
|
|
||||||
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
|
|
||||||
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
|
|
||||||
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
|
|
||||||
|
|
||||||
/* error codes */
|
|
||||||
#define CL_DEVICE_PARTITION_FAILED_EXT -1057
|
|
||||||
#define CL_INVALID_PARTITION_COUNT_EXT -1058
|
|
||||||
#define CL_INVALID_PARTITION_NAME_EXT -1059
|
|
||||||
|
|
||||||
/* CL_AFFINITY_DOMAINs */
|
|
||||||
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
|
|
||||||
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
|
|
||||||
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
|
|
||||||
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
|
|
||||||
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
|
|
||||||
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
|
|
||||||
|
|
||||||
/* cl_device_partition_property_ext list terminators */
|
|
||||||
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
|
|
||||||
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
|
|
||||||
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
|
|
||||||
|
|
||||||
|
|
||||||
/***********************************
|
|
||||||
* cl_ext_migrate_memobject extension definitions
|
|
||||||
***********************************/
|
|
||||||
#define cl_ext_migrate_memobject 1
|
|
||||||
|
|
||||||
typedef cl_bitfield cl_mem_migration_flags_ext;
|
|
||||||
|
|
||||||
#define CL_MIGRATE_MEM_OBJECT_HOST_EXT 0x1
|
|
||||||
|
|
||||||
#define CL_COMMAND_MIGRATE_MEM_OBJECT_EXT 0x4040
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue,
|
|
||||||
cl_uint num_mem_objects,
|
|
||||||
const cl_mem * mem_objects,
|
|
||||||
cl_mem_migration_flags_ext flags,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event);
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int
|
|
||||||
(CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue,
|
|
||||||
cl_uint num_mem_objects,
|
|
||||||
const cl_mem * mem_objects,
|
|
||||||
cl_mem_migration_flags_ext flags,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event);
|
|
||||||
|
|
||||||
|
|
||||||
/*********************************
|
|
||||||
* cl_qcom_ext_host_ptr extension
|
|
||||||
*********************************/
|
|
||||||
#define cl_qcom_ext_host_ptr 1
|
|
||||||
|
|
||||||
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
|
|
||||||
|
|
||||||
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
|
|
||||||
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
|
|
||||||
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
|
|
||||||
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
|
|
||||||
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
|
|
||||||
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
|
|
||||||
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
|
|
||||||
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
|
|
||||||
|
|
||||||
typedef cl_uint cl_image_pitch_info_qcom;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clGetDeviceImageInfoQCOM(cl_device_id device,
|
|
||||||
size_t image_width,
|
|
||||||
size_t image_height,
|
|
||||||
const cl_image_format *image_format,
|
|
||||||
cl_image_pitch_info_qcom param_name,
|
|
||||||
size_t param_value_size,
|
|
||||||
void *param_value,
|
|
||||||
size_t *param_value_size_ret);
|
|
||||||
|
|
||||||
typedef struct _cl_mem_ext_host_ptr
|
|
||||||
{
|
|
||||||
/* Type of external memory allocation. */
|
|
||||||
/* Legal values will be defined in layered extensions. */
|
|
||||||
cl_uint allocation_type;
|
|
||||||
|
|
||||||
/* Host cache policy for this external memory allocation. */
|
|
||||||
cl_uint host_cache_policy;
|
|
||||||
|
|
||||||
} cl_mem_ext_host_ptr;
|
|
||||||
|
|
||||||
|
|
||||||
/*******************************************
|
|
||||||
* cl_qcom_ext_host_ptr_iocoherent extension
|
|
||||||
********************************************/
|
|
||||||
|
|
||||||
/* Cache policy specifying io-coherence */
|
|
||||||
#define CL_MEM_HOST_IOCOHERENT_QCOM 0x40A9
|
|
||||||
|
|
||||||
|
|
||||||
/*********************************
|
|
||||||
* cl_qcom_ion_host_ptr extension
|
|
||||||
*********************************/
|
|
||||||
|
|
||||||
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
|
|
||||||
|
|
||||||
typedef struct _cl_mem_ion_host_ptr
|
|
||||||
{
|
|
||||||
/* Type of external memory allocation. */
|
|
||||||
/* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
|
|
||||||
cl_mem_ext_host_ptr ext_host_ptr;
|
|
||||||
|
|
||||||
/* ION file descriptor */
|
|
||||||
int ion_filedesc;
|
|
||||||
|
|
||||||
/* Host pointer to the ION allocated memory */
|
|
||||||
void* ion_hostptr;
|
|
||||||
|
|
||||||
} cl_mem_ion_host_ptr;
|
|
||||||
|
|
||||||
|
|
||||||
/*********************************
|
|
||||||
* cl_qcom_android_native_buffer_host_ptr extension
|
|
||||||
*********************************/
|
|
||||||
|
|
||||||
#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6
|
|
||||||
|
|
||||||
typedef struct _cl_mem_android_native_buffer_host_ptr
|
|
||||||
{
|
|
||||||
/* Type of external memory allocation. */
|
|
||||||
/* Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. */
|
|
||||||
cl_mem_ext_host_ptr ext_host_ptr;
|
|
||||||
|
|
||||||
/* Virtual pointer to the android native buffer */
|
|
||||||
void* anb_ptr;
|
|
||||||
|
|
||||||
} cl_mem_android_native_buffer_host_ptr;
|
|
||||||
|
|
||||||
|
|
||||||
/******************************************
|
|
||||||
* cl_img_yuv_image extension *
|
|
||||||
******************************************/
|
|
||||||
|
|
||||||
/* Image formats used in clCreateImage */
|
|
||||||
#define CL_NV21_IMG 0x40D0
|
|
||||||
#define CL_YV12_IMG 0x40D1
|
|
||||||
|
|
||||||
|
|
||||||
/******************************************
|
|
||||||
* cl_img_cached_allocations extension *
|
|
||||||
******************************************/
|
|
||||||
|
|
||||||
/* Flag values used by clCreateBuffer */
|
|
||||||
#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG (1 << 26)
|
|
||||||
#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG (1 << 27)
|
|
||||||
|
|
||||||
|
|
||||||
/******************************************
|
|
||||||
* cl_img_use_gralloc_ptr extension *
|
|
||||||
******************************************/
|
|
||||||
#define cl_img_use_gralloc_ptr 1
|
|
||||||
|
|
||||||
/* Flag values used by clCreateBuffer */
|
|
||||||
#define CL_MEM_USE_GRALLOC_PTR_IMG (1 << 28)
|
|
||||||
|
|
||||||
/* To be used by clGetEventInfo: */
|
|
||||||
#define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG 0x40D2
|
|
||||||
#define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG 0x40D3
|
|
||||||
|
|
||||||
/* Error code from clEnqueueReleaseGrallocObjectsIMG */
|
|
||||||
#define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG 0x40D4
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueAcquireGrallocObjectsIMG(cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem * mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueReleaseGrallocObjectsIMG(cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem * mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
|
|
||||||
/*********************************
|
|
||||||
* cl_khr_subgroups extension
|
|
||||||
*********************************/
|
|
||||||
#define cl_khr_subgroups 1
|
|
||||||
|
|
||||||
#if !defined(CL_VERSION_2_1)
|
|
||||||
/* For OpenCL 2.1 and newer, cl_kernel_sub_group_info is declared in CL.h.
|
|
||||||
In hindsight, there should have been a khr suffix on this type for
|
|
||||||
the extension, but keeping it un-suffixed to maintain backwards
|
|
||||||
compatibility. */
|
|
||||||
typedef cl_uint cl_kernel_sub_group_info;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* cl_kernel_sub_group_info */
|
|
||||||
#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033
|
|
||||||
#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clGetKernelSubGroupInfoKHR(cl_kernel in_kernel,
|
|
||||||
cl_device_id in_device,
|
|
||||||
cl_kernel_sub_group_info param_name,
|
|
||||||
size_t input_value_size,
|
|
||||||
const void * input_value,
|
|
||||||
size_t param_value_size,
|
|
||||||
void * param_value,
|
|
||||||
size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int
|
|
||||||
(CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel in_kernel,
|
|
||||||
cl_device_id in_device,
|
|
||||||
cl_kernel_sub_group_info param_name,
|
|
||||||
size_t input_value_size,
|
|
||||||
const void * input_value,
|
|
||||||
size_t param_value_size,
|
|
||||||
void * param_value,
|
|
||||||
size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
|
|
||||||
|
|
||||||
|
|
||||||
/*********************************
|
|
||||||
* cl_khr_mipmap_image extension
|
|
||||||
*********************************/
|
|
||||||
|
|
||||||
/* cl_sampler_properties */
|
|
||||||
#define CL_SAMPLER_MIP_FILTER_MODE_KHR 0x1155
|
|
||||||
#define CL_SAMPLER_LOD_MIN_KHR 0x1156
|
|
||||||
#define CL_SAMPLER_LOD_MAX_KHR 0x1157
|
|
||||||
|
|
||||||
|
|
||||||
/*********************************
|
|
||||||
* cl_khr_priority_hints extension
|
|
||||||
*********************************/
|
|
||||||
/* This extension define is for backwards compatibility.
|
|
||||||
It shouldn't be required since this extension has no new functions. */
|
|
||||||
#define cl_khr_priority_hints 1
|
|
||||||
|
|
||||||
typedef cl_uint cl_queue_priority_khr;
|
|
||||||
|
|
||||||
/* cl_command_queue_properties */
|
|
||||||
#define CL_QUEUE_PRIORITY_KHR 0x1096
|
|
||||||
|
|
||||||
/* cl_queue_priority_khr */
|
|
||||||
#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0)
|
|
||||||
#define CL_QUEUE_PRIORITY_MED_KHR (1<<1)
|
|
||||||
#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2)
|
|
||||||
|
|
||||||
|
|
||||||
/*********************************
|
|
||||||
* cl_khr_throttle_hints extension
|
|
||||||
*********************************/
|
|
||||||
/* This extension define is for backwards compatibility.
|
|
||||||
It shouldn't be required since this extension has no new functions. */
|
|
||||||
#define cl_khr_throttle_hints 1
|
|
||||||
|
|
||||||
typedef cl_uint cl_queue_throttle_khr;
|
|
||||||
|
|
||||||
/* cl_command_queue_properties */
|
|
||||||
#define CL_QUEUE_THROTTLE_KHR 0x1097
|
|
||||||
|
|
||||||
/* cl_queue_throttle_khr */
|
|
||||||
#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0)
|
|
||||||
#define CL_QUEUE_THROTTLE_MED_KHR (1<<1)
|
|
||||||
#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2)
|
|
||||||
|
|
||||||
|
|
||||||
/*********************************
|
|
||||||
* cl_khr_subgroup_named_barrier
|
|
||||||
*********************************/
|
|
||||||
/* This extension define is for backwards compatibility.
|
|
||||||
It shouldn't be required since this extension has no new functions. */
|
|
||||||
#define cl_khr_subgroup_named_barrier 1
|
|
||||||
|
|
||||||
/* cl_device_info */
|
|
||||||
#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035
|
|
||||||
|
|
||||||
|
|
||||||
/*********************************
|
|
||||||
* cl_khr_extended_versioning
|
|
||||||
*********************************/
|
|
||||||
|
|
||||||
#define cl_khr_extended_versioning 1
|
|
||||||
|
|
||||||
#define CL_VERSION_MAJOR_BITS_KHR (10)
|
|
||||||
#define CL_VERSION_MINOR_BITS_KHR (10)
|
|
||||||
#define CL_VERSION_PATCH_BITS_KHR (12)
|
|
||||||
|
|
||||||
#define CL_VERSION_MAJOR_MASK_KHR ((1 << CL_VERSION_MAJOR_BITS_KHR) - 1)
|
|
||||||
#define CL_VERSION_MINOR_MASK_KHR ((1 << CL_VERSION_MINOR_BITS_KHR) - 1)
|
|
||||||
#define CL_VERSION_PATCH_MASK_KHR ((1 << CL_VERSION_PATCH_BITS_KHR) - 1)
|
|
||||||
|
|
||||||
#define CL_VERSION_MAJOR_KHR(version) ((version) >> (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR))
|
|
||||||
#define CL_VERSION_MINOR_KHR(version) (((version) >> CL_VERSION_PATCH_BITS_KHR) & CL_VERSION_MINOR_MASK_KHR)
|
|
||||||
#define CL_VERSION_PATCH_KHR(version) ((version) & CL_VERSION_PATCH_MASK_KHR)
|
|
||||||
|
|
||||||
#define CL_MAKE_VERSION_KHR(major, minor, patch) \
|
|
||||||
((((major) & CL_VERSION_MAJOR_MASK_KHR) << (CL_VERSION_MINOR_BITS_KHR + CL_VERSION_PATCH_BITS_KHR)) | \
|
|
||||||
(((minor) & CL_VERSION_MINOR_MASK_KHR) << CL_VERSION_PATCH_BITS_KHR) | \
|
|
||||||
((patch) & CL_VERSION_PATCH_MASK_KHR))
|
|
||||||
|
|
||||||
typedef cl_uint cl_version_khr;
|
|
||||||
|
|
||||||
#define CL_NAME_VERSION_MAX_NAME_SIZE_KHR 64
|
|
||||||
|
|
||||||
typedef struct _cl_name_version_khr
|
|
||||||
{
|
|
||||||
cl_version_khr version;
|
|
||||||
char name[CL_NAME_VERSION_MAX_NAME_SIZE_KHR];
|
|
||||||
} cl_name_version_khr;
|
|
||||||
|
|
||||||
/* cl_platform_info */
|
|
||||||
#define CL_PLATFORM_NUMERIC_VERSION_KHR 0x0906
|
|
||||||
#define CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR 0x0907
|
|
||||||
|
|
||||||
/* cl_device_info */
|
|
||||||
#define CL_DEVICE_NUMERIC_VERSION_KHR 0x105E
|
|
||||||
#define CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR 0x105F
|
|
||||||
#define CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR 0x1060
|
|
||||||
#define CL_DEVICE_ILS_WITH_VERSION_KHR 0x1061
|
|
||||||
#define CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR 0x1062
|
|
||||||
|
|
||||||
|
|
||||||
/*********************************
|
|
||||||
* cl_khr_device_uuid extension
|
|
||||||
*********************************/
|
|
||||||
#define cl_khr_device_uuid 1
|
|
||||||
|
|
||||||
#define CL_UUID_SIZE_KHR 16
|
|
||||||
#define CL_LUID_SIZE_KHR 8
|
|
||||||
|
|
||||||
#define CL_DEVICE_UUID_KHR 0x106A
|
|
||||||
#define CL_DRIVER_UUID_KHR 0x106B
|
|
||||||
#define CL_DEVICE_LUID_VALID_KHR 0x106C
|
|
||||||
#define CL_DEVICE_LUID_KHR 0x106D
|
|
||||||
#define CL_DEVICE_NODE_MASK_KHR 0x106E
|
|
||||||
|
|
||||||
|
|
||||||
/**********************************
|
|
||||||
* cl_arm_import_memory extension *
|
|
||||||
**********************************/
|
|
||||||
#define cl_arm_import_memory 1
|
|
||||||
|
|
||||||
typedef intptr_t cl_import_properties_arm;
|
|
||||||
|
|
||||||
/* Default and valid proporties name for cl_arm_import_memory */
|
|
||||||
#define CL_IMPORT_TYPE_ARM 0x40B2
|
|
||||||
|
|
||||||
/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */
|
|
||||||
#define CL_IMPORT_TYPE_HOST_ARM 0x40B3
|
|
||||||
|
|
||||||
/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
|
|
||||||
#define CL_IMPORT_TYPE_DMA_BUF_ARM 0x40B4
|
|
||||||
|
|
||||||
/* Protected memory property */
|
|
||||||
#define CL_IMPORT_TYPE_PROTECTED_ARM 0x40B5
|
|
||||||
|
|
||||||
/* Android hardware buffer type value for CL_IMPORT_TYPE_ARM property */
|
|
||||||
#define CL_IMPORT_TYPE_ANDROID_HARDWARE_BUFFER_ARM 0x41E2
|
|
||||||
|
|
||||||
/* Data consistency with host property */
|
|
||||||
#define CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM 0x41E3
|
|
||||||
|
|
||||||
/* Import memory size value to indicate a size for the whole buffer */
|
|
||||||
#define CL_IMPORT_MEMORY_WHOLE_ALLOCATION_ARM SIZE_MAX
|
|
||||||
|
|
||||||
/* This extension adds a new function that allows for direct memory import into
|
|
||||||
* OpenCL via the clImportMemoryARM function.
|
|
||||||
*
|
|
||||||
* Memory imported through this interface will be mapped into the device's page
|
|
||||||
* tables directly, providing zero copy access. It will never fall back to copy
|
|
||||||
* operations and aliased buffers.
|
|
||||||
*
|
|
||||||
* Types of memory supported for import are specified as additional extension
|
|
||||||
* strings.
|
|
||||||
*
|
|
||||||
* This extension produces cl_mem allocations which are compatible with all other
|
|
||||||
* users of cl_mem in the standard API.
|
|
||||||
*
|
|
||||||
* This extension maps pages with the same properties as the normal buffer creation
|
|
||||||
* function clCreateBuffer.
|
|
||||||
*/
|
|
||||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
|
||||||
clImportMemoryARM( cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
const cl_import_properties_arm *properties,
|
|
||||||
void *memory,
|
|
||||||
size_t size,
|
|
||||||
cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
|
|
||||||
/******************************************
|
|
||||||
* cl_arm_shared_virtual_memory extension *
|
|
||||||
******************************************/
|
|
||||||
#define cl_arm_shared_virtual_memory 1
|
|
||||||
|
|
||||||
/* Used by clGetDeviceInfo */
|
|
||||||
#define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6
|
|
||||||
|
|
||||||
/* Used by clGetMemObjectInfo */
|
|
||||||
#define CL_MEM_USES_SVM_POINTER_ARM 0x40B7
|
|
||||||
|
|
||||||
/* Used by clSetKernelExecInfoARM: */
|
|
||||||
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM 0x40B8
|
|
||||||
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM 0x40B9
|
|
||||||
|
|
||||||
/* To be used by clGetEventInfo: */
|
|
||||||
#define CL_COMMAND_SVM_FREE_ARM 0x40BA
|
|
||||||
#define CL_COMMAND_SVM_MEMCPY_ARM 0x40BB
|
|
||||||
#define CL_COMMAND_SVM_MEMFILL_ARM 0x40BC
|
|
||||||
#define CL_COMMAND_SVM_MAP_ARM 0x40BD
|
|
||||||
#define CL_COMMAND_SVM_UNMAP_ARM 0x40BE
|
|
||||||
|
|
||||||
/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */
|
|
||||||
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM (1 << 0)
|
|
||||||
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM (1 << 1)
|
|
||||||
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM (1 << 2)
|
|
||||||
#define CL_DEVICE_SVM_ATOMICS_ARM (1 << 3)
|
|
||||||
|
|
||||||
/* Flag values used by clSVMAllocARM: */
|
|
||||||
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM (1 << 10)
|
|
||||||
#define CL_MEM_SVM_ATOMICS_ARM (1 << 11)
|
|
||||||
|
|
||||||
typedef cl_bitfield cl_svm_mem_flags_arm;
|
|
||||||
typedef cl_uint cl_kernel_exec_info_arm;
|
|
||||||
typedef cl_bitfield cl_device_svm_capabilities_arm;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY void * CL_API_CALL
|
|
||||||
clSVMAllocARM(cl_context context,
|
|
||||||
cl_svm_mem_flags_arm flags,
|
|
||||||
size_t size,
|
|
||||||
cl_uint alignment) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY void CL_API_CALL
|
|
||||||
clSVMFreeARM(cl_context context,
|
|
||||||
void * svm_pointer) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueSVMFreeARM(cl_command_queue command_queue,
|
|
||||||
cl_uint num_svm_pointers,
|
|
||||||
void * svm_pointers[],
|
|
||||||
void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,
|
|
||||||
cl_uint num_svm_pointers,
|
|
||||||
void * svm_pointers[],
|
|
||||||
void * user_data),
|
|
||||||
void * user_data,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueSVMMemcpyARM(cl_command_queue command_queue,
|
|
||||||
cl_bool blocking_copy,
|
|
||||||
void * dst_ptr,
|
|
||||||
const void * src_ptr,
|
|
||||||
size_t size,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueSVMMemFillARM(cl_command_queue command_queue,
|
|
||||||
void * svm_ptr,
|
|
||||||
const void * pattern,
|
|
||||||
size_t pattern_size,
|
|
||||||
size_t size,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueSVMMapARM(cl_command_queue command_queue,
|
|
||||||
cl_bool blocking_map,
|
|
||||||
cl_map_flags flags,
|
|
||||||
void * svm_ptr,
|
|
||||||
size_t size,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueSVMUnmapARM(cl_command_queue command_queue,
|
|
||||||
void * svm_ptr,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clSetKernelArgSVMPointerARM(cl_kernel kernel,
|
|
||||||
cl_uint arg_index,
|
|
||||||
const void * arg_value) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clSetKernelExecInfoARM(cl_kernel kernel,
|
|
||||||
cl_kernel_exec_info_arm param_name,
|
|
||||||
size_t param_value_size,
|
|
||||||
const void * param_value) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
/********************************
|
|
||||||
* cl_arm_get_core_id extension *
|
|
||||||
********************************/
|
|
||||||
|
|
||||||
#ifdef CL_VERSION_1_2
|
|
||||||
|
|
||||||
#define cl_arm_get_core_id 1
|
|
||||||
|
|
||||||
/* Device info property for bitfield of cores present */
|
|
||||||
#define CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM 0x40BF
|
|
||||||
|
|
||||||
#endif /* CL_VERSION_1_2 */
|
|
||||||
|
|
||||||
/*********************************
|
|
||||||
* cl_arm_job_slot_selection
|
|
||||||
*********************************/
|
|
||||||
|
|
||||||
#define cl_arm_job_slot_selection 1
|
|
||||||
|
|
||||||
/* cl_device_info */
|
|
||||||
#define CL_DEVICE_JOB_SLOTS_ARM 0x41E0
|
|
||||||
|
|
||||||
/* cl_command_queue_properties */
|
|
||||||
#define CL_QUEUE_JOB_SLOT_ARM 0x41E1
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* __CL_EXT_H */
|
|
||||||
@ -1,682 +0,0 @@
|
|||||||
/*******************************************************************************
|
|
||||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*
|
|
||||||
******************************************************************************/
|
|
||||||
/*****************************************************************************\
|
|
||||||
|
|
||||||
Copyright (c) 2013-2020 Intel Corporation All Rights Reserved.
|
|
||||||
|
|
||||||
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
|
||||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
|
||||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
|
|
||||||
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
File Name: cl_ext_intel.h
|
|
||||||
|
|
||||||
Abstract:
|
|
||||||
|
|
||||||
Notes:
|
|
||||||
|
|
||||||
\*****************************************************************************/
|
|
||||||
|
|
||||||
#ifndef __CL_EXT_INTEL_H
|
|
||||||
#define __CL_EXT_INTEL_H
|
|
||||||
|
|
||||||
#include <CL/cl.h>
|
|
||||||
#include <CL/cl_platform.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/***************************************
|
|
||||||
* cl_intel_thread_local_exec extension *
|
|
||||||
****************************************/
|
|
||||||
|
|
||||||
#define cl_intel_thread_local_exec 1
|
|
||||||
|
|
||||||
#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31)
|
|
||||||
|
|
||||||
/***********************************************
|
|
||||||
* cl_intel_device_partition_by_names extension *
|
|
||||||
************************************************/
|
|
||||||
|
|
||||||
#define cl_intel_device_partition_by_names 1
|
|
||||||
|
|
||||||
#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052
|
|
||||||
#define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1
|
|
||||||
|
|
||||||
/************************************************
|
|
||||||
* cl_intel_accelerator extension *
|
|
||||||
* cl_intel_motion_estimation extension *
|
|
||||||
* cl_intel_advanced_motion_estimation extension *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
#define cl_intel_accelerator 1
|
|
||||||
#define cl_intel_motion_estimation 1
|
|
||||||
#define cl_intel_advanced_motion_estimation 1
|
|
||||||
|
|
||||||
typedef struct _cl_accelerator_intel* cl_accelerator_intel;
|
|
||||||
typedef cl_uint cl_accelerator_type_intel;
|
|
||||||
typedef cl_uint cl_accelerator_info_intel;
|
|
||||||
|
|
||||||
typedef struct _cl_motion_estimation_desc_intel {
|
|
||||||
cl_uint mb_block_type;
|
|
||||||
cl_uint subpixel_mode;
|
|
||||||
cl_uint sad_adjust_mode;
|
|
||||||
cl_uint search_path_type;
|
|
||||||
} cl_motion_estimation_desc_intel;
|
|
||||||
|
|
||||||
/* error codes */
|
|
||||||
#define CL_INVALID_ACCELERATOR_INTEL -1094
|
|
||||||
#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095
|
|
||||||
#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096
|
|
||||||
#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097
|
|
||||||
|
|
||||||
/* cl_accelerator_type_intel */
|
|
||||||
#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0
|
|
||||||
|
|
||||||
/* cl_accelerator_info_intel */
|
|
||||||
#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090
|
|
||||||
#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091
|
|
||||||
#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092
|
|
||||||
#define CL_ACCELERATOR_TYPE_INTEL 0x4093
|
|
||||||
|
|
||||||
/* cl_motion_detect_desc_intel flags */
|
|
||||||
#define CL_ME_MB_TYPE_16x16_INTEL 0x0
|
|
||||||
#define CL_ME_MB_TYPE_8x8_INTEL 0x1
|
|
||||||
#define CL_ME_MB_TYPE_4x4_INTEL 0x2
|
|
||||||
|
|
||||||
#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
|
|
||||||
#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
|
|
||||||
#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2
|
|
||||||
|
|
||||||
#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
|
|
||||||
#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1
|
|
||||||
|
|
||||||
#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0
|
|
||||||
#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1
|
|
||||||
#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5
|
|
||||||
|
|
||||||
#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0
|
|
||||||
#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1
|
|
||||||
#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2
|
|
||||||
#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4
|
|
||||||
|
|
||||||
#define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1
|
|
||||||
#define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2
|
|
||||||
#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3
|
|
||||||
|
|
||||||
#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16
|
|
||||||
#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21
|
|
||||||
#define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32
|
|
||||||
#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43
|
|
||||||
#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48
|
|
||||||
|
|
||||||
#define CL_ME_COST_PENALTY_NONE_INTEL 0x0
|
|
||||||
#define CL_ME_COST_PENALTY_LOW_INTEL 0x1
|
|
||||||
#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2
|
|
||||||
#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3
|
|
||||||
|
|
||||||
#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0
|
|
||||||
#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1
|
|
||||||
#define CL_ME_COST_PRECISION_PEL_INTEL 0x2
|
|
||||||
#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3
|
|
||||||
|
|
||||||
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
|
|
||||||
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
|
||||||
#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
|
|
||||||
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
|
|
||||||
|
|
||||||
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
|
|
||||||
#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
|
|
||||||
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
|
|
||||||
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
|
|
||||||
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
|
|
||||||
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
|
|
||||||
|
|
||||||
#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
|
|
||||||
#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
|
||||||
#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
|
|
||||||
#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
|
|
||||||
|
|
||||||
/* cl_device_info */
|
|
||||||
#define CL_DEVICE_ME_VERSION_INTEL 0x407E
|
|
||||||
|
|
||||||
#define CL_ME_VERSION_LEGACY_INTEL 0x0
|
|
||||||
#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1
|
|
||||||
#define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL
|
|
||||||
clCreateAcceleratorINTEL(
|
|
||||||
cl_context context,
|
|
||||||
cl_accelerator_type_intel accelerator_type,
|
|
||||||
size_t descriptor_size,
|
|
||||||
const void* descriptor,
|
|
||||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)(
|
|
||||||
cl_context context,
|
|
||||||
cl_accelerator_type_intel accelerator_type,
|
|
||||||
size_t descriptor_size,
|
|
||||||
const void* descriptor,
|
|
||||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clGetAcceleratorInfoINTEL(
|
|
||||||
cl_accelerator_intel accelerator,
|
|
||||||
cl_accelerator_info_intel param_name,
|
|
||||||
size_t param_value_size,
|
|
||||||
void* param_value,
|
|
||||||
size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)(
|
|
||||||
cl_accelerator_intel accelerator,
|
|
||||||
cl_accelerator_info_intel param_name,
|
|
||||||
size_t param_value_size,
|
|
||||||
void* param_value,
|
|
||||||
size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clRetainAcceleratorINTEL(
|
|
||||||
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)(
|
|
||||||
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clReleaseAcceleratorINTEL(
|
|
||||||
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)(
|
|
||||||
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
/******************************************
|
|
||||||
* cl_intel_simultaneous_sharing extension *
|
|
||||||
*******************************************/
|
|
||||||
|
|
||||||
#define cl_intel_simultaneous_sharing 1
|
|
||||||
|
|
||||||
#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104
|
|
||||||
#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105
|
|
||||||
|
|
||||||
/***********************************
|
|
||||||
* cl_intel_egl_image_yuv extension *
|
|
||||||
************************************/
|
|
||||||
|
|
||||||
#define cl_intel_egl_image_yuv 1
|
|
||||||
|
|
||||||
#define CL_EGL_YUV_PLANE_INTEL 0x4107
|
|
||||||
|
|
||||||
/********************************
|
|
||||||
* cl_intel_packed_yuv extension *
|
|
||||||
*********************************/
|
|
||||||
|
|
||||||
#define cl_intel_packed_yuv 1
|
|
||||||
|
|
||||||
#define CL_YUYV_INTEL 0x4076
|
|
||||||
#define CL_UYVY_INTEL 0x4077
|
|
||||||
#define CL_YVYU_INTEL 0x4078
|
|
||||||
#define CL_VYUY_INTEL 0x4079
|
|
||||||
|
|
||||||
/********************************************
|
|
||||||
* cl_intel_required_subgroup_size extension *
|
|
||||||
*********************************************/
|
|
||||||
|
|
||||||
#define cl_intel_required_subgroup_size 1
|
|
||||||
|
|
||||||
#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108
|
|
||||||
#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109
|
|
||||||
#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A
|
|
||||||
|
|
||||||
/****************************************
|
|
||||||
* cl_intel_driver_diagnostics extension *
|
|
||||||
*****************************************/
|
|
||||||
|
|
||||||
#define cl_intel_driver_diagnostics 1
|
|
||||||
|
|
||||||
typedef cl_uint cl_diagnostics_verbose_level;
|
|
||||||
|
|
||||||
#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106
|
|
||||||
|
|
||||||
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff )
|
|
||||||
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 )
|
|
||||||
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 )
|
|
||||||
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 )
|
|
||||||
|
|
||||||
/********************************
|
|
||||||
* cl_intel_planar_yuv extension *
|
|
||||||
*********************************/
|
|
||||||
|
|
||||||
#define CL_NV12_INTEL 0x410E
|
|
||||||
|
|
||||||
#define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 )
|
|
||||||
#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 )
|
|
||||||
|
|
||||||
#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E
|
|
||||||
#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F
|
|
||||||
|
|
||||||
/*******************************************************
|
|
||||||
* cl_intel_device_side_avc_motion_estimation extension *
|
|
||||||
********************************************************/
|
|
||||||
|
|
||||||
#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B
|
|
||||||
#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C
|
|
||||||
#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D
|
|
||||||
|
|
||||||
#define CL_AVC_ME_VERSION_0_INTEL 0x0 /* No support. */
|
|
||||||
#define CL_AVC_ME_VERSION_1_INTEL 0x1 /* First supported version. */
|
|
||||||
|
|
||||||
#define CL_AVC_ME_MAJOR_16x16_INTEL 0x0
|
|
||||||
#define CL_AVC_ME_MAJOR_16x8_INTEL 0x1
|
|
||||||
#define CL_AVC_ME_MAJOR_8x16_INTEL 0x2
|
|
||||||
#define CL_AVC_ME_MAJOR_8x8_INTEL 0x3
|
|
||||||
|
|
||||||
#define CL_AVC_ME_MINOR_8x8_INTEL 0x0
|
|
||||||
#define CL_AVC_ME_MINOR_8x4_INTEL 0x1
|
|
||||||
#define CL_AVC_ME_MINOR_4x8_INTEL 0x2
|
|
||||||
#define CL_AVC_ME_MINOR_4x4_INTEL 0x3
|
|
||||||
|
|
||||||
#define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0
|
|
||||||
#define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1
|
|
||||||
#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2
|
|
||||||
|
|
||||||
#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0
|
|
||||||
#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E
|
|
||||||
#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D
|
|
||||||
#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B
|
|
||||||
#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77
|
|
||||||
#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F
|
|
||||||
#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F
|
|
||||||
#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F
|
|
||||||
|
|
||||||
#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0
|
|
||||||
#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1
|
|
||||||
#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2
|
|
||||||
#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3
|
|
||||||
#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4
|
|
||||||
#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5
|
|
||||||
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6
|
|
||||||
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7
|
|
||||||
#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8
|
|
||||||
#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9
|
|
||||||
#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2
|
|
||||||
#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa
|
|
||||||
|
|
||||||
#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
|
|
||||||
#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2
|
|
||||||
|
|
||||||
#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
|
|
||||||
#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
|
|
||||||
#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3
|
|
||||||
|
|
||||||
#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0
|
|
||||||
#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1
|
|
||||||
#define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2
|
|
||||||
#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3
|
|
||||||
|
|
||||||
#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10
|
|
||||||
#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15
|
|
||||||
#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20
|
|
||||||
#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B
|
|
||||||
#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30
|
|
||||||
|
|
||||||
#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0
|
|
||||||
#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2
|
|
||||||
#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4
|
|
||||||
#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8
|
|
||||||
|
|
||||||
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0
|
|
||||||
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000
|
|
||||||
|
|
||||||
#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
|
|
||||||
#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
|
|
||||||
#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 )
|
|
||||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 )
|
|
||||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 )
|
|
||||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 )
|
|
||||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
|
|
||||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
|
|
||||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 )
|
|
||||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 )
|
|
||||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 )
|
|
||||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 )
|
|
||||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 )
|
|
||||||
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 )
|
|
||||||
|
|
||||||
#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00
|
|
||||||
#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80
|
|
||||||
|
|
||||||
#define CL_AVC_ME_INTRA_16x16_INTEL 0x0
|
|
||||||
#define CL_AVC_ME_INTRA_8x8_INTEL 0x1
|
|
||||||
#define CL_AVC_ME_INTRA_4x4_INTEL 0x2
|
|
||||||
|
|
||||||
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6
|
|
||||||
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5
|
|
||||||
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3
|
|
||||||
|
|
||||||
#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60
|
|
||||||
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10
|
|
||||||
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8
|
|
||||||
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4
|
|
||||||
|
|
||||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
|
|
||||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
|
||||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
|
|
||||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
|
|
||||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
|
|
||||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
|
|
||||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
|
|
||||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
|
|
||||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
|
|
||||||
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
|
|
||||||
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
|
|
||||||
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
|
|
||||||
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
|
|
||||||
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
|
|
||||||
|
|
||||||
#define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1
|
|
||||||
#define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2
|
|
||||||
#define CL_AVC_ME_FRAME_DUAL_INTEL 0x3
|
|
||||||
|
|
||||||
#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0
|
|
||||||
#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1
|
|
||||||
#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2
|
|
||||||
|
|
||||||
#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0
|
|
||||||
#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1
|
|
||||||
|
|
||||||
/*******************************************
|
|
||||||
* cl_intel_unified_shared_memory extension *
|
|
||||||
********************************************/
|
|
||||||
|
|
||||||
/* These APIs are in sync with Revision O of the cl_intel_unified_shared_memory spec! */
|
|
||||||
|
|
||||||
#define cl_intel_unified_shared_memory 1
|
|
||||||
|
|
||||||
/* cl_device_info */
|
|
||||||
#define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL 0x4190
|
|
||||||
#define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL 0x4191
|
|
||||||
#define CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4192
|
|
||||||
#define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4193
|
|
||||||
#define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL 0x4194
|
|
||||||
|
|
||||||
typedef cl_bitfield cl_device_unified_shared_memory_capabilities_intel;
|
|
||||||
|
|
||||||
/* cl_device_unified_shared_memory_capabilities_intel - bitfield */
|
|
||||||
#define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL (1 << 0)
|
|
||||||
#define CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL (1 << 1)
|
|
||||||
#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL (1 << 2)
|
|
||||||
#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL (1 << 3)
|
|
||||||
|
|
||||||
typedef cl_bitfield cl_mem_properties_intel;
|
|
||||||
|
|
||||||
/* cl_mem_properties_intel */
|
|
||||||
#define CL_MEM_ALLOC_FLAGS_INTEL 0x4195
|
|
||||||
|
|
||||||
typedef cl_bitfield cl_mem_alloc_flags_intel;
|
|
||||||
|
|
||||||
/* cl_mem_alloc_flags_intel - bitfield */
|
|
||||||
#define CL_MEM_ALLOC_WRITE_COMBINED_INTEL (1 << 0)
|
|
||||||
|
|
||||||
typedef cl_uint cl_mem_info_intel;
|
|
||||||
|
|
||||||
/* cl_mem_alloc_info_intel */
|
|
||||||
#define CL_MEM_ALLOC_TYPE_INTEL 0x419A
|
|
||||||
#define CL_MEM_ALLOC_BASE_PTR_INTEL 0x419B
|
|
||||||
#define CL_MEM_ALLOC_SIZE_INTEL 0x419C
|
|
||||||
#define CL_MEM_ALLOC_DEVICE_INTEL 0x419D
|
|
||||||
/* Enum values 0x419E-0x419F are reserved for future queries. */
|
|
||||||
|
|
||||||
typedef cl_uint cl_unified_shared_memory_type_intel;
|
|
||||||
|
|
||||||
/* cl_unified_shared_memory_type_intel */
|
|
||||||
#define CL_MEM_TYPE_UNKNOWN_INTEL 0x4196
|
|
||||||
#define CL_MEM_TYPE_HOST_INTEL 0x4197
|
|
||||||
#define CL_MEM_TYPE_DEVICE_INTEL 0x4198
|
|
||||||
#define CL_MEM_TYPE_SHARED_INTEL 0x4199
|
|
||||||
|
|
||||||
typedef cl_uint cl_mem_advice_intel;
|
|
||||||
|
|
||||||
/* cl_mem_advice_intel */
|
|
||||||
/* Enum values 0x4208-0x420F are reserved for future memory advices. */
|
|
||||||
|
|
||||||
/* cl_kernel_exec_info */
|
|
||||||
#define CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL 0x4200
|
|
||||||
#define CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL 0x4201
|
|
||||||
#define CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL 0x4202
|
|
||||||
#define CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL 0x4203
|
|
||||||
|
|
||||||
/* cl_command_type */
|
|
||||||
#define CL_COMMAND_MEMFILL_INTEL 0x4204
|
|
||||||
#define CL_COMMAND_MEMCPY_INTEL 0x4205
|
|
||||||
#define CL_COMMAND_MIGRATEMEM_INTEL 0x4206
|
|
||||||
#define CL_COMMAND_MEMADVISE_INTEL 0x4207
|
|
||||||
|
|
||||||
extern CL_API_ENTRY void* CL_API_CALL
|
|
||||||
clHostMemAllocINTEL(
|
|
||||||
cl_context context,
|
|
||||||
const cl_mem_properties_intel* properties,
|
|
||||||
size_t size,
|
|
||||||
cl_uint alignment,
|
|
||||||
cl_int* errcode_ret);
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY void* (CL_API_CALL *
|
|
||||||
clHostMemAllocINTEL_fn)(
|
|
||||||
cl_context context,
|
|
||||||
const cl_mem_properties_intel* properties,
|
|
||||||
size_t size,
|
|
||||||
cl_uint alignment,
|
|
||||||
cl_int* errcode_ret);
|
|
||||||
|
|
||||||
extern CL_API_ENTRY void* CL_API_CALL
|
|
||||||
clDeviceMemAllocINTEL(
|
|
||||||
cl_context context,
|
|
||||||
cl_device_id device,
|
|
||||||
const cl_mem_properties_intel* properties,
|
|
||||||
size_t size,
|
|
||||||
cl_uint alignment,
|
|
||||||
cl_int* errcode_ret);
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY void* (CL_API_CALL *
|
|
||||||
clDeviceMemAllocINTEL_fn)(
|
|
||||||
cl_context context,
|
|
||||||
cl_device_id device,
|
|
||||||
const cl_mem_properties_intel* properties,
|
|
||||||
size_t size,
|
|
||||||
cl_uint alignment,
|
|
||||||
cl_int* errcode_ret);
|
|
||||||
|
|
||||||
extern CL_API_ENTRY void* CL_API_CALL
|
|
||||||
clSharedMemAllocINTEL(
|
|
||||||
cl_context context,
|
|
||||||
cl_device_id device,
|
|
||||||
const cl_mem_properties_intel* properties,
|
|
||||||
size_t size,
|
|
||||||
cl_uint alignment,
|
|
||||||
cl_int* errcode_ret);
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY void* (CL_API_CALL *
|
|
||||||
clSharedMemAllocINTEL_fn)(
|
|
||||||
cl_context context,
|
|
||||||
cl_device_id device,
|
|
||||||
const cl_mem_properties_intel* properties,
|
|
||||||
size_t size,
|
|
||||||
cl_uint alignment,
|
|
||||||
cl_int* errcode_ret);
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clMemFreeINTEL(
|
|
||||||
cl_context context,
|
|
||||||
void* ptr);
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *
|
|
||||||
clMemFreeINTEL_fn)(
|
|
||||||
cl_context context,
|
|
||||||
void* ptr);
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clGetMemAllocInfoINTEL(
|
|
||||||
cl_context context,
|
|
||||||
const void* ptr,
|
|
||||||
cl_mem_info_intel param_name,
|
|
||||||
size_t param_value_size,
|
|
||||||
void* param_value,
|
|
||||||
size_t* param_value_size_ret);
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *
|
|
||||||
clGetMemAllocInfoINTEL_fn)(
|
|
||||||
cl_context context,
|
|
||||||
const void* ptr,
|
|
||||||
cl_mem_info_intel param_name,
|
|
||||||
size_t param_value_size,
|
|
||||||
void* param_value,
|
|
||||||
size_t* param_value_size_ret);
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clSetKernelArgMemPointerINTEL(
|
|
||||||
cl_kernel kernel,
|
|
||||||
cl_uint arg_index,
|
|
||||||
const void* arg_value);
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *
|
|
||||||
clSetKernelArgMemPointerINTEL_fn)(
|
|
||||||
cl_kernel kernel,
|
|
||||||
cl_uint arg_index,
|
|
||||||
const void* arg_value);
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueMemsetINTEL( /* Deprecated */
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
void* dst_ptr,
|
|
||||||
cl_int value,
|
|
||||||
size_t size,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event);
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *
|
|
||||||
clEnqueueMemsetINTEL_fn)( /* Deprecated */
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
void* dst_ptr,
|
|
||||||
cl_int value,
|
|
||||||
size_t size,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event);
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueMemFillINTEL(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
void* dst_ptr,
|
|
||||||
const void* pattern,
|
|
||||||
size_t pattern_size,
|
|
||||||
size_t size,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event);
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *
|
|
||||||
clEnqueueMemFillINTEL_fn)(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
void* dst_ptr,
|
|
||||||
const void* pattern,
|
|
||||||
size_t pattern_size,
|
|
||||||
size_t size,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event);
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueMemcpyINTEL(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_bool blocking,
|
|
||||||
void* dst_ptr,
|
|
||||||
const void* src_ptr,
|
|
||||||
size_t size,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event);
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *
|
|
||||||
clEnqueueMemcpyINTEL_fn)(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_bool blocking,
|
|
||||||
void* dst_ptr,
|
|
||||||
const void* src_ptr,
|
|
||||||
size_t size,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event);
|
|
||||||
|
|
||||||
#ifdef CL_VERSION_1_2
|
|
||||||
|
|
||||||
/* Because these APIs use cl_mem_migration_flags, they require
|
|
||||||
OpenCL 1.2: */
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueMigrateMemINTEL(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
const void* ptr,
|
|
||||||
size_t size,
|
|
||||||
cl_mem_migration_flags flags,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event);
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *
|
|
||||||
clEnqueueMigrateMemINTEL_fn)(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
const void* ptr,
|
|
||||||
size_t size,
|
|
||||||
cl_mem_migration_flags flags,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueMemAdviseINTEL(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
const void* ptr,
|
|
||||||
size_t size,
|
|
||||||
cl_mem_advice_intel advice,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event);
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *
|
|
||||||
clEnqueueMemAdviseINTEL_fn)(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
const void* ptr,
|
|
||||||
size_t size,
|
|
||||||
cl_mem_advice_intel advice,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* __CL_EXT_INTEL_H */
|
|
||||||
159
CL/cl_gl.h
159
CL/cl_gl.h
@ -1,159 +0,0 @@
|
|||||||
/*******************************************************************************
|
|
||||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
#ifndef __OPENCL_CL_GL_H
|
|
||||||
#define __OPENCL_CL_GL_H
|
|
||||||
|
|
||||||
#include <CL/cl.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef cl_uint cl_gl_object_type;
|
|
||||||
typedef cl_uint cl_gl_texture_info;
|
|
||||||
typedef cl_uint cl_gl_platform_info;
|
|
||||||
typedef struct __GLsync *cl_GLsync;
|
|
||||||
|
|
||||||
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
|
|
||||||
#define CL_GL_OBJECT_BUFFER 0x2000
|
|
||||||
#define CL_GL_OBJECT_TEXTURE2D 0x2001
|
|
||||||
#define CL_GL_OBJECT_TEXTURE3D 0x2002
|
|
||||||
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
|
|
||||||
#ifdef CL_VERSION_1_2
|
|
||||||
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
|
|
||||||
#define CL_GL_OBJECT_TEXTURE1D 0x200F
|
|
||||||
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
|
|
||||||
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* cl_gl_texture_info */
|
|
||||||
#define CL_GL_TEXTURE_TARGET 0x2004
|
|
||||||
#define CL_GL_MIPMAP_LEVEL 0x2005
|
|
||||||
#ifdef CL_VERSION_1_2
|
|
||||||
#define CL_GL_NUM_SAMPLES 0x2012
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
|
||||||
clCreateFromGLBuffer(cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
cl_GLuint bufobj,
|
|
||||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
#ifdef CL_VERSION_1_2
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
|
||||||
clCreateFromGLTexture(cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
cl_GLenum target,
|
|
||||||
cl_GLint miplevel,
|
|
||||||
cl_GLuint texture,
|
|
||||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
|
||||||
clCreateFromGLRenderbuffer(cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
cl_GLuint renderbuffer,
|
|
||||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clGetGLObjectInfo(cl_mem memobj,
|
|
||||||
cl_gl_object_type * gl_object_type,
|
|
||||||
cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clGetGLTextureInfo(cl_mem memobj,
|
|
||||||
cl_gl_texture_info param_name,
|
|
||||||
size_t param_value_size,
|
|
||||||
void * param_value,
|
|
||||||
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueAcquireGLObjects(cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem * mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueReleaseGLObjects(cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem * mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event * event_wait_list,
|
|
||||||
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
|
|
||||||
/* Deprecated OpenCL 1.1 APIs */
|
|
||||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
|
||||||
clCreateFromGLTexture2D(cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
cl_GLenum target,
|
|
||||||
cl_GLint miplevel,
|
|
||||||
cl_GLuint texture,
|
|
||||||
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
|
|
||||||
clCreateFromGLTexture3D(cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
cl_GLenum target,
|
|
||||||
cl_GLint miplevel,
|
|
||||||
cl_GLuint texture,
|
|
||||||
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
|
|
||||||
|
|
||||||
/* cl_khr_gl_sharing extension */
|
|
||||||
|
|
||||||
#define cl_khr_gl_sharing 1
|
|
||||||
|
|
||||||
typedef cl_uint cl_gl_context_info;
|
|
||||||
|
|
||||||
/* Additional Error Codes */
|
|
||||||
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
|
|
||||||
|
|
||||||
/* cl_gl_context_info */
|
|
||||||
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
|
|
||||||
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
|
|
||||||
|
|
||||||
/* Additional cl_context_properties */
|
|
||||||
#define CL_GL_CONTEXT_KHR 0x2008
|
|
||||||
#define CL_EGL_DISPLAY_KHR 0x2009
|
|
||||||
#define CL_GLX_DISPLAY_KHR 0x200A
|
|
||||||
#define CL_WGL_HDC_KHR 0x200B
|
|
||||||
#define CL_CGL_SHAREGROUP_KHR 0x200C
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clGetGLContextInfoKHR(const cl_context_properties * properties,
|
|
||||||
cl_gl_context_info param_name,
|
|
||||||
size_t param_value_size,
|
|
||||||
void * param_value,
|
|
||||||
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
|
|
||||||
const cl_context_properties * properties,
|
|
||||||
cl_gl_context_info param_name,
|
|
||||||
size_t param_value_size,
|
|
||||||
void * param_value,
|
|
||||||
size_t * param_value_size_ret);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* __OPENCL_CL_GL_H */
|
|
||||||
@ -1,40 +0,0 @@
|
|||||||
/*******************************************************************************
|
|
||||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
#ifndef __OPENCL_CL_GL_EXT_H
|
|
||||||
#define __OPENCL_CL_GL_EXT_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <CL/cl_gl.h>
|
|
||||||
|
|
||||||
/*
|
|
||||||
* cl_khr_gl_event extension
|
|
||||||
*/
|
|
||||||
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_event CL_API_CALL
|
|
||||||
clCreateEventFromGLsyncKHR(cl_context context,
|
|
||||||
cl_GLsync cl_GLsync,
|
|
||||||
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* __OPENCL_CL_GL_EXT_H */
|
|
||||||
440
CL/cl_half.h
440
CL/cl_half.h
@ -1,440 +0,0 @@
|
|||||||
/*******************************************************************************
|
|
||||||
* Copyright (c) 2019-2020 The Khronos Group Inc.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This is a header-only utility library that provides OpenCL host code with
|
|
||||||
* routines for converting to/from cl_half values.
|
|
||||||
*
|
|
||||||
* Example usage:
|
|
||||||
*
|
|
||||||
* #include <CL/cl_half.h>
|
|
||||||
* ...
|
|
||||||
* cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE);
|
|
||||||
* cl_float f = cl_half_to_float(h);
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef OPENCL_CL_HALF_H
|
|
||||||
#define OPENCL_CL_HALF_H
|
|
||||||
|
|
||||||
#include <CL/cl_platform.h>
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Rounding mode used when converting to cl_half.
|
|
||||||
*/
|
|
||||||
typedef enum
|
|
||||||
{
|
|
||||||
CL_HALF_RTE, // round to nearest even
|
|
||||||
CL_HALF_RTZ, // round towards zero
|
|
||||||
CL_HALF_RTP, // round towards positive infinity
|
|
||||||
CL_HALF_RTN, // round towards negative infinity
|
|
||||||
} cl_half_rounding_mode;
|
|
||||||
|
|
||||||
|
|
||||||
/* Private utility macros. */
|
|
||||||
#define CL_HALF_EXP_MASK 0x7C00
|
|
||||||
#define CL_HALF_MAX_FINITE_MAG 0x7BFF
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Utility to deal with values that overflow when converting to half precision.
|
|
||||||
*/
|
|
||||||
static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode,
|
|
||||||
uint16_t sign)
|
|
||||||
{
|
|
||||||
if (rounding_mode == CL_HALF_RTZ)
|
|
||||||
{
|
|
||||||
// Round overflow towards zero -> largest finite number (preserving sign)
|
|
||||||
return (sign << 15) | CL_HALF_MAX_FINITE_MAG;
|
|
||||||
}
|
|
||||||
else if (rounding_mode == CL_HALF_RTP && sign)
|
|
||||||
{
|
|
||||||
// Round negative overflow towards positive infinity -> most negative finite number
|
|
||||||
return (1 << 15) | CL_HALF_MAX_FINITE_MAG;
|
|
||||||
}
|
|
||||||
else if (rounding_mode == CL_HALF_RTN && !sign)
|
|
||||||
{
|
|
||||||
// Round positive overflow towards negative infinity -> largest finite number
|
|
||||||
return CL_HALF_MAX_FINITE_MAG;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Overflow to infinity
|
|
||||||
return (sign << 15) | CL_HALF_EXP_MASK;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Utility to deal with values that underflow when converting to half precision.
|
|
||||||
*/
|
|
||||||
static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode,
|
|
||||||
uint16_t sign)
|
|
||||||
{
|
|
||||||
if (rounding_mode == CL_HALF_RTP && !sign)
|
|
||||||
{
|
|
||||||
// Round underflow towards positive infinity -> smallest positive value
|
|
||||||
return (sign << 15) | 1;
|
|
||||||
}
|
|
||||||
else if (rounding_mode == CL_HALF_RTN && sign)
|
|
||||||
{
|
|
||||||
// Round underflow towards negative infinity -> largest negative value
|
|
||||||
return (sign << 15) | 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush to zero
|
|
||||||
return (sign << 15);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert a cl_float to a cl_half.
|
|
||||||
*/
|
|
||||||
static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode)
|
|
||||||
{
|
|
||||||
// Type-punning to get direct access to underlying bits
|
|
||||||
union
|
|
||||||
{
|
|
||||||
cl_float f;
|
|
||||||
uint32_t i;
|
|
||||||
} f32;
|
|
||||||
f32.f = f;
|
|
||||||
|
|
||||||
// Extract sign bit
|
|
||||||
uint16_t sign = f32.i >> 31;
|
|
||||||
|
|
||||||
// Extract FP32 exponent and mantissa
|
|
||||||
uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF;
|
|
||||||
uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1);
|
|
||||||
|
|
||||||
// Remove FP32 exponent bias
|
|
||||||
int32_t exp = f_exp - CL_FLT_MAX_EXP + 1;
|
|
||||||
|
|
||||||
// Add FP16 exponent bias
|
|
||||||
uint16_t h_exp = exp + CL_HALF_MAX_EXP - 1;
|
|
||||||
|
|
||||||
// Position of the bit that will become the FP16 mantissa LSB
|
|
||||||
uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG;
|
|
||||||
|
|
||||||
// Check for NaN / infinity
|
|
||||||
if (f_exp == 0xFF)
|
|
||||||
{
|
|
||||||
if (f_mant)
|
|
||||||
{
|
|
||||||
// NaN -> propagate mantissa and silence it
|
|
||||||
uint16_t h_mant = f_mant >> lsb_pos;
|
|
||||||
h_mant |= 0x200;
|
|
||||||
return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Infinity -> zero mantissa
|
|
||||||
return (sign << 15) | CL_HALF_EXP_MASK;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for zero
|
|
||||||
if (!f_exp && !f_mant)
|
|
||||||
{
|
|
||||||
return (sign << 15);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for overflow
|
|
||||||
if (exp >= CL_HALF_MAX_EXP)
|
|
||||||
{
|
|
||||||
return cl_half_handle_overflow(rounding_mode, sign);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for underflow
|
|
||||||
if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
|
|
||||||
{
|
|
||||||
return cl_half_handle_underflow(rounding_mode, sign);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for value that will become denormal
|
|
||||||
if (exp < -14)
|
|
||||||
{
|
|
||||||
// Denormal -> include the implicit 1 from the FP32 mantissa
|
|
||||||
h_exp = 0;
|
|
||||||
f_mant |= 1 << (CL_FLT_MANT_DIG - 1);
|
|
||||||
|
|
||||||
// Mantissa shift amount depends on exponent
|
|
||||||
lsb_pos = -exp + (CL_FLT_MANT_DIG - 25);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Generate FP16 mantissa by shifting FP32 mantissa
|
|
||||||
uint16_t h_mant = f_mant >> lsb_pos;
|
|
||||||
|
|
||||||
// Check whether we need to round
|
|
||||||
uint32_t halfway = 1 << (lsb_pos - 1);
|
|
||||||
uint32_t mask = (halfway << 1) - 1;
|
|
||||||
switch (rounding_mode)
|
|
||||||
{
|
|
||||||
case CL_HALF_RTE:
|
|
||||||
if ((f_mant & mask) > halfway)
|
|
||||||
{
|
|
||||||
// More than halfway -> round up
|
|
||||||
h_mant += 1;
|
|
||||||
}
|
|
||||||
else if ((f_mant & mask) == halfway)
|
|
||||||
{
|
|
||||||
// Exactly halfway -> round to nearest even
|
|
||||||
if (h_mant & 0x1)
|
|
||||||
h_mant += 1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case CL_HALF_RTZ:
|
|
||||||
// Mantissa has already been truncated -> do nothing
|
|
||||||
break;
|
|
||||||
case CL_HALF_RTP:
|
|
||||||
if ((f_mant & mask) && !sign)
|
|
||||||
{
|
|
||||||
// Round positive numbers up
|
|
||||||
h_mant += 1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case CL_HALF_RTN:
|
|
||||||
if ((f_mant & mask) && sign)
|
|
||||||
{
|
|
||||||
// Round negative numbers down
|
|
||||||
h_mant += 1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for mantissa overflow
|
|
||||||
if (h_mant & 0x400)
|
|
||||||
{
|
|
||||||
h_exp += 1;
|
|
||||||
h_mant = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (sign << 15) | (h_exp << 10) | h_mant;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert a cl_double to a cl_half.
|
|
||||||
*/
|
|
||||||
static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode)
|
|
||||||
{
|
|
||||||
// Type-punning to get direct access to underlying bits
|
|
||||||
union
|
|
||||||
{
|
|
||||||
cl_double d;
|
|
||||||
uint64_t i;
|
|
||||||
} f64;
|
|
||||||
f64.d = d;
|
|
||||||
|
|
||||||
// Extract sign bit
|
|
||||||
uint16_t sign = f64.i >> 63;
|
|
||||||
|
|
||||||
// Extract FP64 exponent and mantissa
|
|
||||||
uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF;
|
|
||||||
uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1);
|
|
||||||
|
|
||||||
// Remove FP64 exponent bias
|
|
||||||
int64_t exp = d_exp - CL_DBL_MAX_EXP + 1;
|
|
||||||
|
|
||||||
// Add FP16 exponent bias
|
|
||||||
uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
|
|
||||||
|
|
||||||
// Position of the bit that will become the FP16 mantissa LSB
|
|
||||||
uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG;
|
|
||||||
|
|
||||||
// Check for NaN / infinity
|
|
||||||
if (d_exp == 0x7FF)
|
|
||||||
{
|
|
||||||
if (d_mant)
|
|
||||||
{
|
|
||||||
// NaN -> propagate mantissa and silence it
|
|
||||||
uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
|
|
||||||
h_mant |= 0x200;
|
|
||||||
return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Infinity -> zero mantissa
|
|
||||||
return (sign << 15) | CL_HALF_EXP_MASK;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for zero
|
|
||||||
if (!d_exp && !d_mant)
|
|
||||||
{
|
|
||||||
return (sign << 15);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for overflow
|
|
||||||
if (exp >= CL_HALF_MAX_EXP)
|
|
||||||
{
|
|
||||||
return cl_half_handle_overflow(rounding_mode, sign);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for underflow
|
|
||||||
if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
|
|
||||||
{
|
|
||||||
return cl_half_handle_underflow(rounding_mode, sign);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for value that will become denormal
|
|
||||||
if (exp < -14)
|
|
||||||
{
|
|
||||||
// Include the implicit 1 from the FP64 mantissa
|
|
||||||
h_exp = 0;
|
|
||||||
d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1);
|
|
||||||
|
|
||||||
// Mantissa shift amount depends on exponent
|
|
||||||
lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Generate FP16 mantissa by shifting FP64 mantissa
|
|
||||||
uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
|
|
||||||
|
|
||||||
// Check whether we need to round
|
|
||||||
uint64_t halfway = (uint64_t)1 << (lsb_pos - 1);
|
|
||||||
uint64_t mask = (halfway << 1) - 1;
|
|
||||||
switch (rounding_mode)
|
|
||||||
{
|
|
||||||
case CL_HALF_RTE:
|
|
||||||
if ((d_mant & mask) > halfway)
|
|
||||||
{
|
|
||||||
// More than halfway -> round up
|
|
||||||
h_mant += 1;
|
|
||||||
}
|
|
||||||
else if ((d_mant & mask) == halfway)
|
|
||||||
{
|
|
||||||
// Exactly halfway -> round to nearest even
|
|
||||||
if (h_mant & 0x1)
|
|
||||||
h_mant += 1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case CL_HALF_RTZ:
|
|
||||||
// Mantissa has already been truncated -> do nothing
|
|
||||||
break;
|
|
||||||
case CL_HALF_RTP:
|
|
||||||
if ((d_mant & mask) && !sign)
|
|
||||||
{
|
|
||||||
// Round positive numbers up
|
|
||||||
h_mant += 1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case CL_HALF_RTN:
|
|
||||||
if ((d_mant & mask) && sign)
|
|
||||||
{
|
|
||||||
// Round negative numbers down
|
|
||||||
h_mant += 1;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for mantissa overflow
|
|
||||||
if (h_mant & 0x400)
|
|
||||||
{
|
|
||||||
h_exp += 1;
|
|
||||||
h_mant = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (sign << 15) | (h_exp << 10) | h_mant;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert a cl_half to a cl_float.
|
|
||||||
*/
|
|
||||||
static inline cl_float cl_half_to_float(cl_half h)
|
|
||||||
{
|
|
||||||
// Type-punning to get direct access to underlying bits
|
|
||||||
union
|
|
||||||
{
|
|
||||||
cl_float f;
|
|
||||||
uint32_t i;
|
|
||||||
} f32;
|
|
||||||
|
|
||||||
// Extract sign bit
|
|
||||||
uint16_t sign = h >> 15;
|
|
||||||
|
|
||||||
// Extract FP16 exponent and mantissa
|
|
||||||
uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
|
|
||||||
uint16_t h_mant = h & 0x3FF;
|
|
||||||
|
|
||||||
// Remove FP16 exponent bias
|
|
||||||
int32_t exp = h_exp - CL_HALF_MAX_EXP + 1;
|
|
||||||
|
|
||||||
// Add FP32 exponent bias
|
|
||||||
uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1;
|
|
||||||
|
|
||||||
// Check for NaN / infinity
|
|
||||||
if (h_exp == 0x1F)
|
|
||||||
{
|
|
||||||
if (h_mant)
|
|
||||||
{
|
|
||||||
// NaN -> propagate mantissa and silence it
|
|
||||||
uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);
|
|
||||||
f_mant |= 0x400000;
|
|
||||||
f32.i = (sign << 31) | 0x7F800000 | f_mant;
|
|
||||||
return f32.f;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Infinity -> zero mantissa
|
|
||||||
f32.i = (sign << 31) | 0x7F800000;
|
|
||||||
return f32.f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for zero / denormal
|
|
||||||
if (h_exp == 0)
|
|
||||||
{
|
|
||||||
if (h_mant == 0)
|
|
||||||
{
|
|
||||||
// Zero -> zero exponent
|
|
||||||
f_exp = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Denormal -> normalize it
|
|
||||||
// - Shift mantissa to make most-significant 1 implicit
|
|
||||||
// - Adjust exponent accordingly
|
|
||||||
uint32_t shift = 0;
|
|
||||||
while ((h_mant & 0x400) == 0)
|
|
||||||
{
|
|
||||||
h_mant <<= 1;
|
|
||||||
shift++;
|
|
||||||
}
|
|
||||||
h_mant &= 0x3FF;
|
|
||||||
f_exp -= shift - 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13);
|
|
||||||
return f32.f;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#undef CL_HALF_EXP_MASK
|
|
||||||
#undef CL_HALF_MAX_FINITE_MAG
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* OPENCL_CL_HALF_H */
|
|
||||||
1287
CL/cl_icd.h
1287
CL/cl_icd.h
File diff suppressed because it is too large
Load Diff
1384
CL/cl_platform.h
1384
CL/cl_platform.h
File diff suppressed because it is too large
Load Diff
@ -1,160 +0,0 @@
|
|||||||
/*******************************************************************************
|
|
||||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
/*****************************************************************************\
|
|
||||||
|
|
||||||
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
|
|
||||||
|
|
||||||
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
|
|
||||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
|
|
||||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
|
|
||||||
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
File Name: cl_va_api_media_sharing_intel.h
|
|
||||||
|
|
||||||
Abstract:
|
|
||||||
|
|
||||||
Notes:
|
|
||||||
|
|
||||||
\*****************************************************************************/
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
|
|
||||||
#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
|
|
||||||
|
|
||||||
#include <CL/cl.h>
|
|
||||||
#include <CL/cl_platform.h>
|
|
||||||
#include <va/va.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/******************************************
|
|
||||||
* cl_intel_va_api_media_sharing extension *
|
|
||||||
*******************************************/
|
|
||||||
|
|
||||||
#define cl_intel_va_api_media_sharing 1
|
|
||||||
|
|
||||||
/* error codes */
|
|
||||||
#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098
|
|
||||||
#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099
|
|
||||||
#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100
|
|
||||||
#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101
|
|
||||||
|
|
||||||
/* cl_va_api_device_source_intel */
|
|
||||||
#define CL_VA_API_DISPLAY_INTEL 0x4094
|
|
||||||
|
|
||||||
/* cl_va_api_device_set_intel */
|
|
||||||
#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095
|
|
||||||
#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096
|
|
||||||
|
|
||||||
/* cl_context_info */
|
|
||||||
#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097
|
|
||||||
|
|
||||||
/* cl_mem_info */
|
|
||||||
#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098
|
|
||||||
|
|
||||||
/* cl_image_info */
|
|
||||||
#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099
|
|
||||||
|
|
||||||
/* cl_command_type */
|
|
||||||
#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A
|
|
||||||
#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B
|
|
||||||
|
|
||||||
typedef cl_uint cl_va_api_device_source_intel;
|
|
||||||
typedef cl_uint cl_va_api_device_set_intel;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
|
|
||||||
cl_platform_id platform,
|
|
||||||
cl_va_api_device_source_intel media_adapter_type,
|
|
||||||
void* media_adapter,
|
|
||||||
cl_va_api_device_set_intel media_adapter_set,
|
|
||||||
cl_uint num_entries,
|
|
||||||
cl_device_id* devices,
|
|
||||||
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)(
|
|
||||||
cl_platform_id platform,
|
|
||||||
cl_va_api_device_source_intel media_adapter_type,
|
|
||||||
void* media_adapter,
|
|
||||||
cl_va_api_device_set_intel media_adapter_set,
|
|
||||||
cl_uint num_entries,
|
|
||||||
cl_device_id* devices,
|
|
||||||
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_mem CL_API_CALL
|
|
||||||
clCreateFromVA_APIMediaSurfaceINTEL(
|
|
||||||
cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
VASurfaceID* surface,
|
|
||||||
cl_uint plane,
|
|
||||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)(
|
|
||||||
cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
VASurfaceID* surface,
|
|
||||||
cl_uint plane,
|
|
||||||
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueAcquireVA_APIMediaSurfacesINTEL(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem* mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem* mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
extern CL_API_ENTRY cl_int CL_API_CALL
|
|
||||||
clEnqueueReleaseVA_APIMediaSurfacesINTEL(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem* mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)(
|
|
||||||
cl_command_queue command_queue,
|
|
||||||
cl_uint num_objects,
|
|
||||||
const cl_mem* mem_objects,
|
|
||||||
cl_uint num_events_in_wait_list,
|
|
||||||
const cl_event* event_wait_list,
|
|
||||||
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */
|
|
||||||
|
|
||||||
@ -1,81 +0,0 @@
|
|||||||
/*******************************************************************************
|
|
||||||
* Copyright (c) 2018-2020 The Khronos Group Inc.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
#ifndef __CL_VERSION_H
|
|
||||||
#define __CL_VERSION_H
|
|
||||||
|
|
||||||
/* Detect which version to target */
|
|
||||||
#if !defined(CL_TARGET_OPENCL_VERSION)
|
|
||||||
#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)")
|
|
||||||
#define CL_TARGET_OPENCL_VERSION 220
|
|
||||||
#endif
|
|
||||||
#if CL_TARGET_OPENCL_VERSION != 100 && \
|
|
||||||
CL_TARGET_OPENCL_VERSION != 110 && \
|
|
||||||
CL_TARGET_OPENCL_VERSION != 120 && \
|
|
||||||
CL_TARGET_OPENCL_VERSION != 200 && \
|
|
||||||
CL_TARGET_OPENCL_VERSION != 210 && \
|
|
||||||
CL_TARGET_OPENCL_VERSION != 220 && \
|
|
||||||
CL_TARGET_OPENCL_VERSION != 300
|
|
||||||
#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220, 300). Defaulting to 220 (OpenCL 2.2)")
|
|
||||||
#undef CL_TARGET_OPENCL_VERSION
|
|
||||||
#define CL_TARGET_OPENCL_VERSION 220
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/* OpenCL Version */
|
|
||||||
#if CL_TARGET_OPENCL_VERSION >= 300 && !defined(CL_VERSION_3_0)
|
|
||||||
#define CL_VERSION_3_0 1
|
|
||||||
#endif
|
|
||||||
#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
|
|
||||||
#define CL_VERSION_2_2 1
|
|
||||||
#endif
|
|
||||||
#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
|
|
||||||
#define CL_VERSION_2_1 1
|
|
||||||
#endif
|
|
||||||
#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
|
|
||||||
#define CL_VERSION_2_0 1
|
|
||||||
#endif
|
|
||||||
#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
|
|
||||||
#define CL_VERSION_1_2 1
|
|
||||||
#endif
|
|
||||||
#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
|
|
||||||
#define CL_VERSION_1_1 1
|
|
||||||
#endif
|
|
||||||
#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
|
|
||||||
#define CL_VERSION_1_0 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Allow deprecated APIs for older OpenCL versions. */
|
|
||||||
#if CL_TARGET_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS)
|
|
||||||
#define CL_USE_DEPRECATED_OPENCL_2_2_APIS
|
|
||||||
#endif
|
|
||||||
#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS)
|
|
||||||
#define CL_USE_DEPRECATED_OPENCL_2_1_APIS
|
|
||||||
#endif
|
|
||||||
#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
|
|
||||||
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
|
||||||
#endif
|
|
||||||
#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS)
|
|
||||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
|
||||||
#endif
|
|
||||||
#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
|
|
||||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
|
||||||
#endif
|
|
||||||
#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS)
|
|
||||||
#define CL_USE_DEPRECATED_OPENCL_1_0_APIS
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* __CL_VERSION_H */
|
|
||||||
33
CL/opencl.h
33
CL/opencl.h
@ -1,33 +0,0 @@
|
|||||||
/*******************************************************************************
|
|
||||||
* Copyright (c) 2008-2020 The Khronos Group Inc.
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
#ifndef __OPENCL_H
|
|
||||||
#define __OPENCL_H
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <CL/cl.h>
|
|
||||||
#include <CL/cl_gl.h>
|
|
||||||
#include <CL/cl_gl_ext.h>
|
|
||||||
#include <CL/cl_ext.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* __OPENCL_H */
|
|
||||||
40
CUDA.make
40
CUDA.make
@ -1,40 +0,0 @@
|
|||||||
CXXFLAGS=-O3
|
|
||||||
CUDA_CXX=nvcc
|
|
||||||
|
|
||||||
|
|
||||||
ifndef NVARCH
|
|
||||||
define nvarch_help
|
|
||||||
Set NVARCH to select sm_?? version.
|
|
||||||
Default: sm_60
|
|
||||||
|
|
||||||
endef
|
|
||||||
$(info $(nvarch_help))
|
|
||||||
NVARCH=sm_60
|
|
||||||
endif
|
|
||||||
|
|
||||||
|
|
||||||
ifndef MEM
|
|
||||||
define mem_help
|
|
||||||
Set MEM to select memory mode.
|
|
||||||
Available options:
|
|
||||||
DEFAULT - allocate host and device memory pointers.
|
|
||||||
MANAGED - use CUDA Managed Memory.
|
|
||||||
PAGEFAULT - shared memory, only host pointers allocated.
|
|
||||||
|
|
||||||
endef
|
|
||||||
$(info $(mem_help))
|
|
||||||
MEM=DEFAULT
|
|
||||||
endif
|
|
||||||
|
|
||||||
MEM_MANAGED= -DMANAGED
|
|
||||||
MEM_PAGEFAULT= -DPAGEFAULT
|
|
||||||
MEM_MODE = $(MEM_$(MEM))
|
|
||||||
|
|
||||||
|
|
||||||
cuda-stream: main.cpp CUDAStream.cu
|
|
||||||
$(CUDA_CXX) -std=c++11 $(CXXFLAGS) -arch=$(NVARCH) $(MEM_MODE) -DCUDA $^ $(EXTRA_FLAGS) -o $@
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean:
|
|
||||||
rm -f cuda-stream
|
|
||||||
|
|
||||||
11
HIP.make
11
HIP.make
@ -1,11 +0,0 @@
|
|||||||
|
|
||||||
HIP_PATH?= /opt/rocm/hip
|
|
||||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
|
||||||
|
|
||||||
hip-stream: main.cpp HIPStream.cpp
|
|
||||||
$(HIPCC) $(CXXFLAGS) -O3 -std=c++11 -DHIP $^ $(EXTRA_FLAGS) -o $@
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean:
|
|
||||||
rm -f hip-stream
|
|
||||||
|
|
||||||
98
Kokkos.make
98
Kokkos.make
@ -1,98 +0,0 @@
|
|||||||
|
|
||||||
default: kokkos-stream
|
|
||||||
|
|
||||||
ifndef DEVICE
|
|
||||||
define device_help
|
|
||||||
Set DEVICE to change flags (defaulting to OpenMP).
|
|
||||||
Available devices are:
|
|
||||||
OpenMP, Serial, Pthreads, Cuda, HIP
|
|
||||||
|
|
||||||
endef
|
|
||||||
$(info $(device_help))
|
|
||||||
DEVICE="OpenMP"
|
|
||||||
endif
|
|
||||||
KOKKOS_DEVICES="$(DEVICE)"
|
|
||||||
|
|
||||||
ifndef ARCH
|
|
||||||
define arch_help
|
|
||||||
Set ARCH to change flags (defaulting to empty).
|
|
||||||
Available architectures are:
|
|
||||||
AMDAVX
|
|
||||||
ARMv80 ARMv81 ARMv8-ThunderX
|
|
||||||
BGQ Power7 Power8 Power9
|
|
||||||
WSM SNB HSW BDW SKX KNC KNL
|
|
||||||
Kepler30 Kepler32 Kepler35 Kepler37
|
|
||||||
Maxwell50 Maxwell52 Maxwell53
|
|
||||||
Pascal60 Pascal61
|
|
||||||
Volta70 Volta72
|
|
||||||
|
|
||||||
endef
|
|
||||||
$(info $(arch_help))
|
|
||||||
ARCH=""
|
|
||||||
endif
|
|
||||||
KOKKOS_ARCH="$(ARCH)"
|
|
||||||
|
|
||||||
ifndef COMPILER
|
|
||||||
define compiler_help
|
|
||||||
Set COMPILER to change flags (defaulting to GNU).
|
|
||||||
Available compilers are:
|
|
||||||
GNU INTEL CRAY PGI ARMCLANG HIPCC
|
|
||||||
|
|
||||||
Note: you may have to do `export CXX=\path\to\hipcc` in case Kokkos detects the wrong compiler
|
|
||||||
|
|
||||||
endef
|
|
||||||
$(info $(compiler_help))
|
|
||||||
COMPILER=GNU
|
|
||||||
endif
|
|
||||||
|
|
||||||
COMPILER_ARMCLANG = armclang++
|
|
||||||
COMPILER_HIPCC = hipcc
|
|
||||||
COMPILER_GNU = g++
|
|
||||||
COMPILER_INTEL = icpc -qopt-streaming-stores=always
|
|
||||||
COMPILER_CRAY = CC
|
|
||||||
COMPILER_PGI = pgc++
|
|
||||||
CXX = $(COMPILER_$(COMPILER))
|
|
||||||
|
|
||||||
ifndef TARGET
|
|
||||||
define target_help
|
|
||||||
Set TARGET to change to offload device. Defaulting to CPU.
|
|
||||||
Available targets are:
|
|
||||||
CPU (default)
|
|
||||||
GPU
|
|
||||||
|
|
||||||
endef
|
|
||||||
$(info $(target_help))
|
|
||||||
TARGET=CPU
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET), GPU)
|
|
||||||
ifneq ($(COMPILER), HIPCC)
|
|
||||||
CXX = $(NVCC_WRAPPER)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
OBJ = main.o KokkosStream.o
|
|
||||||
CXXFLAGS = -O3
|
|
||||||
LINKFLAGS = # empty for now
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ifeq ($(COMPILER), GNU)
|
|
||||||
ifeq ($(DEVICE), OpenMP)
|
|
||||||
CXXFLAGS += -fopenmp
|
|
||||||
LINKFLAGS += -fopenmp
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
|
||||||
|
|
||||||
kokkos-stream: $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
|
||||||
$(CXX) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -DKOKKOS -o $@
|
|
||||||
|
|
||||||
%.o: %.cpp $(KOKKOS_CPP_DEPENDS)
|
|
||||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -DKOKKOS -c $<
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean:
|
|
||||||
rm -f kokkos-stream main.o KokkosStream.o Kokkos_*.o
|
|
||||||
|
|
||||||
58
OpenACC.make
58
OpenACC.make
@ -1,58 +0,0 @@
|
|||||||
|
|
||||||
ifndef COMPILER
|
|
||||||
define compiler_help
|
|
||||||
Set COMPILER to ensure correct flags are set.
|
|
||||||
Available compilers are:
|
|
||||||
PGI GNU
|
|
||||||
endef
|
|
||||||
$(info $(compiler_help))
|
|
||||||
endif
|
|
||||||
|
|
||||||
COMPILER_ = $(CXX)
|
|
||||||
COMPILER_PGI = pgc++
|
|
||||||
COMPILER_GNU = g++
|
|
||||||
|
|
||||||
FLAGS_ = -O3 -std=c++11
|
|
||||||
|
|
||||||
FLAGS_PGI = -std=c++11 -O3 -acc
|
|
||||||
ifeq ($(COMPILER), PGI)
|
|
||||||
define target_help
|
|
||||||
Set a TARGET to ensure PGI targets the correct offload device.
|
|
||||||
Available targets are:
|
|
||||||
SNB, IVB, HSW, SKL, KNL
|
|
||||||
PWR9, AMD
|
|
||||||
KEPLER, MAXWELL, PASCAL, VOLTA
|
|
||||||
HAWAII
|
|
||||||
endef
|
|
||||||
ifndef TARGET
|
|
||||||
$(error $(target_help))
|
|
||||||
endif
|
|
||||||
TARGET_FLAGS_SNB = -ta=multicore -tp=sandybridge
|
|
||||||
TARGET_FLAGS_IVB = -ta=multicore -tp=ivybridge
|
|
||||||
TARGET_FLAGS_HSW = -ta=multicore -tp=haswell
|
|
||||||
TARGET_FLAGS_SKL = -ta=multicore -tp=skylake
|
|
||||||
TARGET_FLAGS_KNL = -ta=multicore -tp=knl
|
|
||||||
TARGET_FLAGS_PWR9 = -ta=multicore -tp=pwr9
|
|
||||||
TARGET_FLAGS_AMD = -ta=multicore -tp=zen
|
|
||||||
TARGET_FLAGS_KEPLER = -ta=nvidia:cc35
|
|
||||||
TARGET_FLAGS_MAXWELL = -ta=nvidia:cc50
|
|
||||||
TARGET_FLAGS_PASCAL = -ta=nvidia:cc60
|
|
||||||
TARGET_FLAGS_VOLTA = -ta=nvidia:cc70
|
|
||||||
TARGET_FLAGS_HAWAII = -ta=radeon:hawaii
|
|
||||||
ifeq ($(TARGET_FLAGS_$(TARGET)),)
|
|
||||||
$(error $(target_help))
|
|
||||||
endif
|
|
||||||
|
|
||||||
FLAGS_PGI += $(TARGET_FLAGS_$(TARGET))
|
|
||||||
|
|
||||||
endif
|
|
||||||
|
|
||||||
FLAGS_GNU = -O3 -std=c++11 -Drestrict=__restrict -fopenacc
|
|
||||||
CXXFLAGS = $(FLAGS_$(COMPILER))
|
|
||||||
|
|
||||||
acc-stream: main.cpp ACCStream.cpp
|
|
||||||
$(COMPILER_$(COMPILER)) $(CXXFLAGS) -DACC $^ $(EXTRA_FLAGS) -o $@
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean:
|
|
||||||
rm -f acc-stream main.o ACCStream.o
|
|
||||||
39
OpenCL.make
39
OpenCL.make
@ -1,39 +0,0 @@
|
|||||||
|
|
||||||
ifndef COMPILER
|
|
||||||
define compiler_help
|
|
||||||
Set COMPILER to change flags (defaulting to GNU).
|
|
||||||
Available compilers are:
|
|
||||||
GNU CLANG INTEL CRAY
|
|
||||||
|
|
||||||
endef
|
|
||||||
$(info $(compiler_help))
|
|
||||||
COMPILER=GNU
|
|
||||||
endif
|
|
||||||
|
|
||||||
COMPILER_GNU = g++
|
|
||||||
COMPILER_CLANG = clang++
|
|
||||||
COMPILER_INTEL = icpc
|
|
||||||
COMPILER_CRAY = CC
|
|
||||||
CXX = $(COMPILER_$(COMPILER))
|
|
||||||
|
|
||||||
FLAGS_ = -O3 -std=c++11
|
|
||||||
FLAGS_GNU = -O3 -std=c++11
|
|
||||||
FLAGS_CLANG = -O3 -std=c++11
|
|
||||||
FLAGS_INTEL = -O3 -std=c++11
|
|
||||||
FLAGS_CRAY = -O3 -hstd=c++11
|
|
||||||
CXXFLAGS=$(FLAGS_$(COMPILER))
|
|
||||||
|
|
||||||
PLATFORM = $(shell uname -s)
|
|
||||||
ifeq ($(PLATFORM), Darwin)
|
|
||||||
LIBS = -framework OpenCL
|
|
||||||
else
|
|
||||||
LIBS = -lOpenCL
|
|
||||||
endif
|
|
||||||
|
|
||||||
ocl-stream: main.cpp OCLStream.cpp
|
|
||||||
$(CXX) $(CXXFLAGS) -DOCL $^ $(EXTRA_FLAGS) $(LIBS) -o $@
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean:
|
|
||||||
rm -f ocl-stream
|
|
||||||
|
|
||||||
103
OpenMP.make
103
OpenMP.make
@ -1,103 +0,0 @@
|
|||||||
|
|
||||||
ifndef COMPILER
|
|
||||||
define compiler_help
|
|
||||||
Set COMPILER to change flags (defaulting to GNU).
|
|
||||||
Available compilers are:
|
|
||||||
CLANG CRAY GNU GNU_PPC INTEL XL PGI
|
|
||||||
NEC ARMCLANG AOMP FUJITSU
|
|
||||||
|
|
||||||
Note: GCC on PPC requires -mcpu=native instead of -march=native so we have a special case for it
|
|
||||||
|
|
||||||
endef
|
|
||||||
$(info $(compiler_help))
|
|
||||||
COMPILER=GNU
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifndef TARGET
|
|
||||||
define target_help
|
|
||||||
Set TARGET to change device (defaulting to CPU).
|
|
||||||
Available targets are:
|
|
||||||
CPU NVIDIA AMD INTEL_GPU
|
|
||||||
|
|
||||||
endef
|
|
||||||
$(info $(target_help))
|
|
||||||
TARGET=CPU
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ("$(COMPILER)", "CLANG")
|
|
||||||
ifdef TARGET
|
|
||||||
ifeq ("$(TARGET)", "NVIDIA")
|
|
||||||
ifndef NVARCH
|
|
||||||
define nvarch_help
|
|
||||||
Set NVARCH to select sm_?? version.
|
|
||||||
Default: sm_60
|
|
||||||
|
|
||||||
endef
|
|
||||||
$(info $(nvarch_help))
|
|
||||||
NVARCH=sm_60
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
COMPILER_ARMCLANG = armclang++
|
|
||||||
COMPILER_GNU = g++
|
|
||||||
COMPILER_GNU_PPC = g++
|
|
||||||
COMPILER_INTEL = icpc
|
|
||||||
COMPILER_CRAY = CC
|
|
||||||
COMPILER_CLANG = clang++
|
|
||||||
COMPILER_XL = xlc++
|
|
||||||
COMPILER_PGI = pgc++
|
|
||||||
COMPILER_NEC = /opt/nec/ve/bin/nc++
|
|
||||||
COMPILER_AOMP = clang++
|
|
||||||
COMPILER_FUJITSU=FCC
|
|
||||||
CXX = $(COMPILER_$(COMPILER))
|
|
||||||
|
|
||||||
FLAGS_GNU = -O3 -std=c++11 -march=native
|
|
||||||
FLAGS_GNU_PPC = -O3 -std=c++11 -mcpu=native
|
|
||||||
FLAGS_INTEL = -O3 -std=c++11
|
|
||||||
FLAGS_CRAY = -O3 -std=c++11
|
|
||||||
FLAGS_CLANG = -O3 -std=c++11
|
|
||||||
FLAGS_XL = -O5 -qarch=auto -qtune=auto -std=c++11
|
|
||||||
FLAGS_PGI = -O3 -std=c++11
|
|
||||||
FLAGS_NEC = -O4 -finline -std=c++11
|
|
||||||
FLAGS_ARMCLANG = -O3 -std=c++11
|
|
||||||
FLAGS_AOMP = -O3 -std=c++11
|
|
||||||
FLAGS_FUJITSU=-Kfast -std=c++11 -KA64FX -KSVE -KARMV8_3_A -Kzfill=100 -Kprefetch_sequential=soft -Kprefetch_line=8 -Kprefetch_line_L2=16
|
|
||||||
CXXFLAGS = $(FLAGS_$(COMPILER))
|
|
||||||
|
|
||||||
# OpenMP flags for CPUs
|
|
||||||
OMP_ARMCLANG_CPU = -fopenmp
|
|
||||||
OMP_GNU_CPU = -fopenmp
|
|
||||||
OMP_GNU_PPC_CPU = -fopenmp
|
|
||||||
OMP_INTEL_CPU = -qopenmp
|
|
||||||
OMP_CRAY_CPU = -fopenmp
|
|
||||||
OMP_CLANG_CPU = -fopenmp=libomp
|
|
||||||
OMP_XL_CPU = -qsmp=omp -qthreaded
|
|
||||||
OMP_PGI_CPU = -mp
|
|
||||||
OMP_NEC_CPU = -fopenmp
|
|
||||||
OMP_FUJITSU_CPU=-Kopenmp
|
|
||||||
|
|
||||||
# OpenMP flags for NVIDIA
|
|
||||||
OMP_CRAY_NVIDIA = -DOMP_TARGET_GPU
|
|
||||||
OMP_CLANG_NVIDIA = -DOMP_TARGET_GPU -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=$(NVARCH)
|
|
||||||
OMP_GNU_NVIDIA = -DOMP_TARGET_GPU -fopenmp -foffload=nvptx-none
|
|
||||||
OMP_GNU_AMD = -DOMP_TARGET_GPU -fopenmp -foffload=amdgcn-amdhsa
|
|
||||||
|
|
||||||
OMP_INTEL_CPU = -xHOST -qopt-streaming-stores=always -qopenmp
|
|
||||||
OMP_INTEL_INTEL_GPU = -DOMP_TARGET_GPU -qnextgen -fiopenmp -fopenmp-targets=spir64
|
|
||||||
|
|
||||||
OMP_AOMP_GPU = -DOMP_TARGET_GPU -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906
|
|
||||||
|
|
||||||
ifndef OMP_$(COMPILER)_$(TARGET)
|
|
||||||
$(error Targeting $(TARGET) with $(COMPILER) not supported)
|
|
||||||
endif
|
|
||||||
|
|
||||||
OMP = $(OMP_$(COMPILER)_$(TARGET))
|
|
||||||
|
|
||||||
omp-stream: main.cpp OMPStream.cpp
|
|
||||||
$(CXX) $(CXXFLAGS) -DOMP $^ $(OMP) $(EXTRA_FLAGS) -o $@
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean:
|
|
||||||
rm -f omp-stream
|
|
||||||
58
RAJA.make
58
RAJA.make
@ -1,58 +0,0 @@
|
|||||||
|
|
||||||
ifndef TARGET
|
|
||||||
define target_help
|
|
||||||
Set TARGET to change to offload device. Defaulting to CPU.
|
|
||||||
Available targets are:
|
|
||||||
CPU (default)
|
|
||||||
GPU
|
|
||||||
endef
|
|
||||||
$(info $(target_help))
|
|
||||||
TARGET=CPU
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET), CPU)
|
|
||||||
|
|
||||||
ifndef COMPILER
|
|
||||||
define compiler_help
|
|
||||||
Set COMPILER to change flags (defaulting to GNU).
|
|
||||||
Available compilers are:
|
|
||||||
INTEL GNU CRAY XL
|
|
||||||
endef
|
|
||||||
$(info $(compiler_help))
|
|
||||||
COMPILER=GNU
|
|
||||||
endif
|
|
||||||
|
|
||||||
CXX_INTEL = icpc
|
|
||||||
CXX_GNU = g++
|
|
||||||
CXX_CRAY = CC
|
|
||||||
CXX_XL = xlc++
|
|
||||||
|
|
||||||
CXXFLAGS_INTEL = -O3 -std=c++11 -qopenmp -xHost -qopt-streaming-stores=always
|
|
||||||
CXXFLAGS_GNU = -O3 -std=c++11 -fopenmp
|
|
||||||
CXXFLAGS_CRAY = -O3 -hstd=c++11
|
|
||||||
CXXFLAGS_XL = -O5 -std=c++11 -qarch=pwr8 -qtune=pwr8 -qsmp=omp -qthreaded
|
|
||||||
|
|
||||||
CXX = $(CXX_$(COMPILER))
|
|
||||||
CXXFLAGS = -DRAJA_TARGET_CPU $(CXXFLAGS_$(COMPILER))
|
|
||||||
|
|
||||||
else ifeq ($(TARGET), GPU)
|
|
||||||
CXX = nvcc
|
|
||||||
|
|
||||||
ifndef ARCH
|
|
||||||
define arch_help
|
|
||||||
Set ARCH to ensure correct GPU architecture.
|
|
||||||
Example:
|
|
||||||
ARCH=sm_35
|
|
||||||
endef
|
|
||||||
$(error $(arch_help))
|
|
||||||
endif
|
|
||||||
CXXFLAGS = --expt-extended-lambda -O3 -std=c++11 -x cu -Xcompiler -fopenmp -arch $(ARCH)
|
|
||||||
endif
|
|
||||||
|
|
||||||
raja-stream: main.cpp RAJAStream.cpp
|
|
||||||
$(CXX) $(CXXFLAGS) -DUSE_RAJA -I$(RAJA_PATH)/include $^ $(EXTRA_FLAGS) -L$(RAJA_PATH)/lib -lRAJA -o $@
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean:
|
|
||||||
rm -f raja-stream
|
|
||||||
|
|
||||||
@ -1,36 +0,0 @@
|
|||||||
Android (outdated instructions)
|
|
||||||
------------------
|
|
||||||
|
|
||||||
Assuming you have a recent Android NDK available, you can use the
|
|
||||||
toolchain that it provides to build GPU-STREAM. You should first
|
|
||||||
use the NDK to generate a standalone toolchain:
|
|
||||||
|
|
||||||
# Select a directory to install the toolchain to
|
|
||||||
ANDROID_NATIVE_TOOLCHAIN=/path/to/toolchain
|
|
||||||
|
|
||||||
${NDK}/build/tools/make-standalone-toolchain.sh \
|
|
||||||
--platform=android-14 \
|
|
||||||
--toolchain=arm-linux-androideabi-4.8 \
|
|
||||||
--install-dir=${ANDROID_NATIVE_TOOLCHAIN}
|
|
||||||
|
|
||||||
Make sure that the OpenCL headers and library (libOpenCL.so) are
|
|
||||||
available in `${ANDROID_NATIVE_TOOLCHAIN}/sysroot/usr/`.
|
|
||||||
|
|
||||||
You should then be able to build GPU-STREAM:
|
|
||||||
|
|
||||||
make CXX=${ANDROID_NATIVE_TOOLCHAIN}/bin/arm-linux-androideabi-g++
|
|
||||||
|
|
||||||
Copy the executable and OpenCL kernels to the device:
|
|
||||||
|
|
||||||
adb push gpu-stream-ocl /data/local/tmp
|
|
||||||
adb push ocl-stream-kernels.cl /data/local/tmp
|
|
||||||
|
|
||||||
Run GPU-STREAM from an adb shell:
|
|
||||||
|
|
||||||
adb shell
|
|
||||||
cd /data/local/tmp
|
|
||||||
|
|
||||||
# Use float if device doesn't support double, and reduce array size
|
|
||||||
./gpu-stream-ocl --float -n 6 -s 10000000
|
|
||||||
|
|
||||||
|
|
||||||
57
README.md
57
README.md
@ -13,6 +13,7 @@ There are multiple implementations of this benchmark in a variety of programming
|
|||||||
Currently implemented are:
|
Currently implemented are:
|
||||||
- OpenCL
|
- OpenCL
|
||||||
- CUDA
|
- CUDA
|
||||||
|
- HIP
|
||||||
- OpenACC
|
- OpenACC
|
||||||
- OpenMP 3 and 4.5
|
- OpenMP 3 and 4.5
|
||||||
- C++ Parallel STL
|
- C++ Parallel STL
|
||||||
@ -20,11 +21,14 @@ Currently implemented are:
|
|||||||
- RAJA
|
- RAJA
|
||||||
- SYCL
|
- SYCL
|
||||||
- TBB
|
- TBB
|
||||||
|
- Thrust (via CUDA or HIP)
|
||||||
|
|
||||||
This code was previously called GPU-STREAM.
|
This code was previously called GPU-STREAM.
|
||||||
|
|
||||||
This project also contains implementations in alternative languages with different build systems:
|
This project also contains implementations in alternative languages with different build systems:
|
||||||
* Scala - [scala-stream](./scala-stream)
|
* Julia - [JuliaStream.jl](./src/julia/JuliaStream.jl)
|
||||||
|
* Java - [java-stream](./src/java/java-stream)
|
||||||
|
* Scala - [scala-stream](./src/scala/scala-stream)
|
||||||
|
|
||||||
How is this different to STREAM?
|
How is this different to STREAM?
|
||||||
--------------------------------
|
--------------------------------
|
||||||
@ -66,20 +70,22 @@ The project supports building with CMake >= 3.13.0, it can be installed without
|
|||||||
As with any CMake project, first configure the project:
|
As with any CMake project, first configure the project:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
> cd babelstream
|
> cd babelstream/src
|
||||||
> cmake -Bbuild -H. -DMODEL=<model> <model specific flags prefixed with -D...> # configure the build, build type defaults to Release
|
> cmake -Bbuild -H. -DMODEL=<model> <model specific flags prefixed with -D...> # configure the build, build type defaults to Release
|
||||||
> cmake --build build # compile it
|
> cmake --build build # compile it
|
||||||
> ./build/babelstream # executable available at ./build/
|
> ./build/<model>-stream # executable available at ./build/
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Source for each model's implementations are located in `./src/<model>`.
|
||||||
|
|
||||||
By default, we have defined a set of optimal flags for known HPC compilers.
|
By default, we have defined a set of optimal flags for known HPC compilers.
|
||||||
There are assigned those to `RELEASE_FLAGS`, and you can override them if required.
|
There are assigned those to `RELEASE_FLAGS`, and you can override them if required.
|
||||||
|
|
||||||
To find out what flag each model supports or requires, simply configure while only specifying the model.
|
To find out what flag each model supports or requires, simply configure while only specifying the model.
|
||||||
For example:
|
For example:
|
||||||
```shell
|
```shell
|
||||||
> cd babelstream
|
> cd babelstream/src
|
||||||
> cmake -Bbuild -H. -DMODEL=OCL
|
> cmake -Bbuild -H. -DMODEL=ocl
|
||||||
...
|
...
|
||||||
- Common Release flags are `-O3`, set RELEASE_FLAGS to override
|
- Common Release flags are `-O3`, set RELEASE_FLAGS to override
|
||||||
-- CXX_EXTRA_FLAGS:
|
-- CXX_EXTRA_FLAGS:
|
||||||
@ -93,8 +99,8 @@ For example:
|
|||||||
Use this for linking extra libraries (e.g `-lmylib`, or simply `mylib`)
|
Use this for linking extra libraries (e.g `-lmylib`, or simply `mylib`)
|
||||||
-- CXX_EXTRA_LINKER_FLAGS:
|
-- CXX_EXTRA_LINKER_FLAGS:
|
||||||
Append to linker flags (i.e GCC's `-Wl` or equivalent)
|
Append to linker flags (i.e GCC's `-Wl` or equivalent)
|
||||||
-- Available models: OMP;OCL;STD;STD20;HIP;CUDA;KOKKOS;SYCL;ACC;RAJA;TBB
|
-- Available models: omp;ocl;std;std20;hip;cuda;kokkos;sycl;acc;raja;tbb
|
||||||
-- Selected model : OCL
|
-- Selected model : ocl
|
||||||
-- Supported flags:
|
-- Supported flags:
|
||||||
|
|
||||||
CMAKE_CXX_COMPILER (optional, default=c++): Any CXX compiler that is supported by CMake detection
|
CMAKE_CXX_COMPILER (optional, default=c++): Any CXX compiler that is supported by CMake detection
|
||||||
@ -107,41 +113,10 @@ Alternatively, refer to the [CI script](./ci-test-compile.sh), which test-compil
|
|||||||
|
|
||||||
### GNU Make
|
### GNU Make
|
||||||
|
|
||||||
We have supplied a series of Makefiles, one for each programming model, to assist with building.
|
Support for Make has been removed from 4.0 onwards.
|
||||||
The Makefiles contain common build options, and should be simple to customise for your needs too.
|
However, as the build process only involves a few source files, the required compile commands can be extracted from the CI output.
|
||||||
|
|
||||||
General usage is `make -f <Model>.make`
|
<!-- TODO add CI snipped here -->
|
||||||
Common compiler flags and names can be set by passing a `COMPILER` option to Make, e.g. `make COMPILER=GNU`.
|
|
||||||
Some models allow specifying a CPU or GPU style target, and this can be set by passing a `TARGET` option to Make, e.g. `make TARGET=GPU`.
|
|
||||||
|
|
||||||
Pass in extra flags via the `EXTRA_FLAGS` option.
|
|
||||||
|
|
||||||
The binaries are named in the form `<model>-stream`.
|
|
||||||
|
|
||||||
#### Building Kokkos for Make
|
|
||||||
|
|
||||||
Kokkos version >= 3 requires setting the `KOKKOS_PATH` flag to the *source* directory of a distribution.
|
|
||||||
For example:
|
|
||||||
|
|
||||||
```
|
|
||||||
cd
|
|
||||||
wget https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz
|
|
||||||
tar -xvf 3.1.01.tar.gz # should end up with ~/kokkos-3.1.01
|
|
||||||
cd BabelStream
|
|
||||||
make -f Kokkos.make KOKKOS_PATH=~/kokkos-3.1.01
|
|
||||||
```
|
|
||||||
See make output for more information on supported flags.
|
|
||||||
|
|
||||||
#### Building RAJA for Make
|
|
||||||
|
|
||||||
We use the following command to build RAJA using the Intel Compiler.
|
|
||||||
```
|
|
||||||
cmake .. -DCMAKE_INSTALL_PREFIX=<prefix> -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_COMPILER=icpc -DRAJA_PTR="RAJA_USE_RESTRICT_ALIGNED_PTR" -DCMAKE_BUILD_TYPE=ICCBuild -DRAJA_ENABLE_TESTS=Off
|
|
||||||
```
|
|
||||||
For building with CUDA support, we use the following command.
|
|
||||||
```
|
|
||||||
cmake .. -DCMAKE_INSTALL_PREFIX=<prefix> -DRAJA_PTR="RAJA_USE_RESTRICT_ALIGNED_PTR" -DRAJA_ENABLE_CUDA=1 -DRAJA_ENABLE_TESTS=Off
|
|
||||||
```
|
|
||||||
|
|
||||||
Results
|
Results
|
||||||
-------
|
-------
|
||||||
|
|||||||
14
STD.make
14
STD.make
@ -1,14 +0,0 @@
|
|||||||
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# For full license terms please see the LICENSE file distributed with this
|
|
||||||
# source code
|
|
||||||
|
|
||||||
CXXFLAGS=-O3 -std=c++17 -stdpar -DSTD
|
|
||||||
STD_CXX=nvc++
|
|
||||||
|
|
||||||
std-stream: main.cpp STDStream.cpp
|
|
||||||
$(STD_CXX) $(CXXFLAGS) $^ $(EXTRA_FLAGS) -o $@
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean:
|
|
||||||
rm -f std-stream
|
|
||||||
26
STD20.make
26
STD20.make
@ -1,26 +0,0 @@
|
|||||||
|
|
||||||
ifndef COMPILER
|
|
||||||
define compiler_help
|
|
||||||
Set COMPILER to change flags (defaulting to GNU).
|
|
||||||
Available compilers are:
|
|
||||||
GNU
|
|
||||||
|
|
||||||
endef
|
|
||||||
$(info $(compiler_help))
|
|
||||||
COMPILER=GNU
|
|
||||||
endif
|
|
||||||
|
|
||||||
COMPILER_GNU = g++
|
|
||||||
CXX = $(COMPILER_$(COMPILER))
|
|
||||||
|
|
||||||
FLAGS_GNU = -O3 -std=c++2a -march=native
|
|
||||||
CXXFLAGS = $(FLAGS_$(COMPILER))
|
|
||||||
|
|
||||||
|
|
||||||
std20-stream: main.cpp STD20Stream.cpp
|
|
||||||
$(CXX) -DSTD20 $(CXXFLAGS) $^ $(EXTRA_FLAGS) -o $@
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean:
|
|
||||||
rm -f std20-stream
|
|
||||||
|
|
||||||
81
SYCL.make
81
SYCL.make
@ -1,81 +0,0 @@
|
|||||||
ifndef COMPILER
|
|
||||||
define compiler_help
|
|
||||||
Set COMPILER to change flags (defaulting to GNU).
|
|
||||||
Available compilers are:
|
|
||||||
HIPSYCL, DPCPP, COMPUTECPP
|
|
||||||
|
|
||||||
|
|
||||||
For HIPSYCL and COMPUTECPP, SYCL_SDK_DIR must be specified, the directory should contain [/lib, /bin, ...]
|
|
||||||
For DPCPP, the compiler must be on path
|
|
||||||
endef
|
|
||||||
$(info $(compiler_help))
|
|
||||||
COMPILER=HIPSYCL
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifndef TARGET
|
|
||||||
define target_help
|
|
||||||
Set TARGET to change device (defaulting to CPU).
|
|
||||||
Available targets are:
|
|
||||||
CPU AMD NVIDIA
|
|
||||||
|
|
||||||
endef
|
|
||||||
$(info $(target_help))
|
|
||||||
TARGET=CPU
|
|
||||||
endif
|
|
||||||
|
|
||||||
|
|
||||||
ifndef ARCH
|
|
||||||
define arch_help
|
|
||||||
Set ARCH to change device (defaulting to "").
|
|
||||||
(GPU *only*) Available targets for HIPSYCL are:
|
|
||||||
For CUDA, the architecture has the form sm_XX, e.g. sm_60 for Pascal.
|
|
||||||
For ROCm, the architecture has the form gfxYYY, e.g. gfx900 for Vega 10, gfx906 for Vega 20.
|
|
||||||
|
|
||||||
endef
|
|
||||||
|
|
||||||
ifeq ($(COMPILER), HIPSYCL)
|
|
||||||
ifneq ($(TARGET), CPU)
|
|
||||||
$(info $(arch_help))
|
|
||||||
ARCH=
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
endif
|
|
||||||
|
|
||||||
SYCL_COMPUTECPP_SYCLFLAGS = $(shell $(SYCL_SDK_DIR)/bin/computecpp_info --dump-device-compiler-flags) -no-serial-memop -sycl-driver
|
|
||||||
SYCL_COMPUTECPP_SYCLFLAGS_CPU = $(SYCL_COMPUTECPP_SYCLFLAGS)
|
|
||||||
SYCL_COMPUTECPP_SYCLFLAGS_AMD = $(SYCL_COMPUTECPP_SYCLFLAGS)
|
|
||||||
SYCL_COMPUTECPP_SYCLFLAGS_NVIDIA = $(SYCL_COMPUTECPP_SYCLFLAGS) -sycl-target ptx64
|
|
||||||
SYCL_COMPUTECPP_SYCLCXX = $(SYCL_SDK_DIR)/bin/compute++
|
|
||||||
SYCL_COMPUTECPP_FLAGS = -O3 -std=c++17
|
|
||||||
SYCL_COMPUTECPP_LINK_FLAGS = -Wl,-rpath=$(SYCL_SDK_DIR)/lib/ $(SYCL_SDK_DIR)/lib/libComputeCpp.so -lOpenCL
|
|
||||||
SYCL_COMPUTECPP_INCLUDE = -I$(SYCL_SDK_DIR)/include
|
|
||||||
|
|
||||||
SYCL_HIPSYCL_SYCLFLAGS_CPU = --hipsycl-platform=cpu
|
|
||||||
SYCL_HIPSYCL_SYCLFLAGS_AMD = --hipsycl-platform=rocm --hipsycl-gpu-arch=$(ARCH)
|
|
||||||
SYCL_HIPSYCL_SYCLFLAGS_NVIDIA = --hipsycl-platform=cuda --hipsycl-gpu-arch=$(ARCH)
|
|
||||||
SYCL_HIPSYCL_SYCLCXX = $(SYCL_SDK_DIR)/bin/syclcc
|
|
||||||
SYCL_HIPSYCL_FLAGS = -O3 --std=c++17
|
|
||||||
SYCL_HIPSYCL_LINK_FLAGS = -L$(SYCL_SDK_DIR)/lib -Wl,-rpath,$(SYCL_SDK_DIR)/lib
|
|
||||||
SYCL_HIPSYCL_INCLUDE =
|
|
||||||
|
|
||||||
SYCL_DPCPP_SYCLFLAGS_NVIDIA = -fsycl -fsycl-targets=nvptx64-nvidia-cuda-sycldevice -fsycl-unnamed-lambda
|
|
||||||
SYCL_DPCPP_SYCLCXX = dpcpp
|
|
||||||
SYCL_DPCPP_FLAGS = -O3 --std=c++17
|
|
||||||
SYCL_DPCPP_LINK_FLAGS =
|
|
||||||
SYCL_DPCPP_INCLUDE =
|
|
||||||
|
|
||||||
|
|
||||||
SYCL_SYCLFLAGS = $(SYCL_$(COMPILER)_SYCLFLAGS_$(TARGET))
|
|
||||||
SYCL_SYCLCXX = $(SYCL_$(COMPILER)_SYCLCXX)
|
|
||||||
SYCL_FLAGS = $(SYCL_$(COMPILER)_FLAGS)
|
|
||||||
SYCL_LINK_FLAGS = $(SYCL_$(COMPILER)_LINK_FLAGS)
|
|
||||||
SYCL_INCLUDE = $(SYCL_$(COMPILER)_INCLUDE)
|
|
||||||
|
|
||||||
# only ComputeCpp generates .sycl files which is a bit odd to deal with so we opted to compile everything together
|
|
||||||
sycl-stream: main.cpp SYCLStream.cpp
|
|
||||||
$(SYCL_SYCLCXX) $(SYCL_SYCLFLAGS) $(SYCL_FLAGS) $(SYCL_INCLUDE) -DSYCL $(EXTRA_FLAGS) $(SYCL_LINK_FLAGS) $^ -o $@
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean:
|
|
||||||
rm -f sycl-stream
|
|
||||||
56
TBB.make
56
TBB.make
@ -1,56 +0,0 @@
|
|||||||
|
|
||||||
ifndef COMPILER
|
|
||||||
define compiler_help
|
|
||||||
Set COMPILER to change flags (defaulting to GNU).
|
|
||||||
Available compilers are:
|
|
||||||
GNU INTEL INTEL_LEGACY
|
|
||||||
|
|
||||||
endef
|
|
||||||
$(info $(compiler_help))
|
|
||||||
COMPILER=GNU
|
|
||||||
endif
|
|
||||||
|
|
||||||
|
|
||||||
CXX_GNU = g++
|
|
||||||
CXX_INTEL = icpx
|
|
||||||
CXX_INTEL_LEGACY = icpc
|
|
||||||
CXX = $(COMPILER_$(COMPILER))
|
|
||||||
|
|
||||||
CXXFLAGS_GNU = -march=native
|
|
||||||
CXXFLAGS_INTEL = -march=native
|
|
||||||
CXXFLAGS_INTEL_LEGACY = -qopt-streaming-stores=always
|
|
||||||
|
|
||||||
CXX = $(CXX_$(COMPILER))
|
|
||||||
CXXFLAGS = -std=c++11 -O3 $(CXXFLAGS_$(COMPILER))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ifndef PARTITIONER
|
|
||||||
define partitioner_help
|
|
||||||
Set PARTITIONER to select TBB's partitioner.
|
|
||||||
Partitioner specifies how a loop template should partition its work among threads.
|
|
||||||
|
|
||||||
Available options:
|
|
||||||
AUTO - Optimize range subdivision based on work-stealing events.
|
|
||||||
AFFINITY - Proportional splitting that optimizes for cache affinity.
|
|
||||||
STATIC - Distribute work uniformly with no additional load balancing.
|
|
||||||
SIMPLE - Recursively split its range until it cannot be further subdivided.
|
|
||||||
|
|
||||||
See https://spec.oneapi.com/versions/latest/elements/oneTBB/source/algorithms.html#partitioners
|
|
||||||
for more details.
|
|
||||||
|
|
||||||
endef
|
|
||||||
$(info $(partitioner_help))
|
|
||||||
PARTITIONER=AUTO
|
|
||||||
endif
|
|
||||||
|
|
||||||
PARTITIONER_MODE = -DPARTITIONER_$(PARTITIONER)
|
|
||||||
|
|
||||||
|
|
||||||
tbb-stream: main.cpp TBBStream.cpp
|
|
||||||
$(CXX) -DTBB $(PARTITIONER_MODE) $(CXXFLAGS) $^ $(EXTRA_FLAGS) -I$(TBB_DIR)/include -Wl,-rpath,$(TBB_DIR)/lib/intel64/gcc4.8 $(TBB_DIR)/lib/intel64/gcc4.8/libtbb.so -o $@
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean:
|
|
||||||
rm -f tbb-stream
|
|
||||||
|
|
||||||
@ -1,21 +0,0 @@
|
|||||||
|
|
||||||
HCC = hcc
|
|
||||||
|
|
||||||
CXXFLAGS+=-O3 $(shell hcc-config --cxxflags)
|
|
||||||
LDFLAGS+=$(shell hcc-config --ldflags)
|
|
||||||
|
|
||||||
ifdef TBSIZE
|
|
||||||
CXXFLAGS+=-DVIRTUALTILESIZE=$(TBSIZE)
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifdef NTILES
|
|
||||||
CXXFLAGS+=-DNTILES=$(TBSIZE)
|
|
||||||
endif
|
|
||||||
|
|
||||||
|
|
||||||
hc-stream: ../main.cpp HCStream.cpp
|
|
||||||
$(HCC) $(CXXFLAGS) -DHC $^ $(LDFLAGS) $(EXTRA_FLAGS) -o $@
|
|
||||||
|
|
||||||
.PHONY: clean
|
|
||||||
clean:
|
|
||||||
rm -f hc-stream
|
|
||||||
29
src/.gitignore
vendored
Normal file
29
src/.gitignore
vendored
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
|
||||||
|
**/cuda-stream
|
||||||
|
**/ocl-stream
|
||||||
|
**/omp-stream
|
||||||
|
**/acc-stream
|
||||||
|
**/raja-stream
|
||||||
|
**/kokkos-stream
|
||||||
|
**/std-stream
|
||||||
|
**/sycl-stream
|
||||||
|
**/hip-stream
|
||||||
|
|
||||||
|
**/*.o
|
||||||
|
**/*.bc
|
||||||
|
**/*.sycl
|
||||||
|
**/*.tar
|
||||||
|
**/*.gz
|
||||||
|
**/*.a
|
||||||
|
|
||||||
|
**/KokkosCore_Config_*
|
||||||
|
|
||||||
|
**/.DS_Store
|
||||||
|
|
||||||
|
|
||||||
|
build/
|
||||||
|
cmake-build-*/
|
||||||
|
CMakeFiles/
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
.directory
|
||||||
@ -7,7 +7,21 @@ project(BabelStream VERSION 3.5 LANGUAGES CXX)
|
|||||||
# some nicer defaults for standard C++
|
# some nicer defaults for standard C++
|
||||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
include(FetchContent)
|
||||||
|
|
||||||
|
FetchContent_Declare(
|
||||||
|
opencl_header
|
||||||
|
URL https://github.com/KhronosGroup/OpenCL-Headers/archive/refs/tags/v2021.06.30.zip
|
||||||
|
URL_HASH MD5=af7ab7918a6a11c60370c8651a9f0192
|
||||||
|
)
|
||||||
|
|
||||||
|
macro(setup_opencl_header_includes)
|
||||||
|
FetchContent_GetProperties(opencl_header)
|
||||||
|
if (NOT opencl_header_POPULATED)
|
||||||
|
FetchContent_Populate(opencl_header)
|
||||||
|
set(OpenCL_INCLUDE_DIR ${opencl_header_SOURCE_DIR})
|
||||||
|
endif ()
|
||||||
|
endmacro()
|
||||||
|
|
||||||
#set(MODEL SYCL)
|
#set(MODEL SYCL)
|
||||||
#set(SYCL_COMPILER COMPUTECPP)
|
#set(SYCL_COMPILER COMPUTECPP)
|
||||||
@ -101,19 +115,19 @@ endif ()
|
|||||||
include(register_models.cmake)
|
include(register_models.cmake)
|
||||||
|
|
||||||
# register out models <model_name> <preprocessor_def_name> <source files...>
|
# register out models <model_name> <preprocessor_def_name> <source files...>
|
||||||
register_model(OMP OMP OMPStream.cpp)
|
register_model(omp OMP OMPStream.cpp)
|
||||||
register_model(OCL OCL OCLStream.cpp)
|
register_model(ocl OCL OCLStream.cpp)
|
||||||
register_model(STD STD STDStream.cpp)
|
register_model(std STD STDStream.cpp)
|
||||||
register_model(STD20 STD20 STD20Stream.cpp)
|
register_model(std20 STD20 STD20Stream.cpp)
|
||||||
register_model(HIP HIP HIPStream.cpp)
|
register_model(hip HIP HIPStream.cpp)
|
||||||
register_model(CUDA CUDA CUDAStream.cu)
|
register_model(cuda CUDA CUDAStream.cu)
|
||||||
register_model(KOKKOS KOKKOS KokkosStream.cpp)
|
register_model(kokkos KOKKOS KokkosStream.cpp)
|
||||||
register_model(SYCL SYCL SYCLStream.cpp)
|
register_model(sycl SYCL SYCLStream.cpp)
|
||||||
register_model(ACC ACC ACCStream.cpp)
|
register_model(acc ACC ACCStream.cpp)
|
||||||
# defining RAJA collides with the RAJA namespace so USE_RAJA
|
# defining RAJA collides with the RAJA namespace so USE_RAJA
|
||||||
register_model(RAJA USE_RAJA RAJAStream.cpp)
|
register_model(raja USE_RAJA RAJAStream.cpp)
|
||||||
register_model(TBB TBB TBBStream.cpp)
|
register_model(tbb TBB TBBStream.cpp)
|
||||||
register_model(THRUST THRUST ThrustStream.cu) # Thrust uses cu, even for rocThrust
|
register_model(thrust THRUST ThrustStream.cu) # Thrust uses cu, even for rocThrust
|
||||||
|
|
||||||
|
|
||||||
set(USAGE ON CACHE BOOL "Whether to print all custom flags for the selected model")
|
set(USAGE ON CACHE BOOL "Whether to print all custom flags for the selected model")
|
||||||
@ -149,7 +163,7 @@ message(STATUS "Default ${CMAKE_BUILD_TYPE} flags are `${DEFAULT_${BUILD_TYPE}_F
|
|||||||
# setup common build flag defaults if there are no overrides
|
# setup common build flag defaults if there are no overrides
|
||||||
if (NOT DEFINED ${BUILD_TYPE}_FLAGS)
|
if (NOT DEFINED ${BUILD_TYPE}_FLAGS)
|
||||||
set(ACTUAL_${BUILD_TYPE}_FLAGS ${DEFAULT_${BUILD_TYPE}_FLAGS})
|
set(ACTUAL_${BUILD_TYPE}_FLAGS ${DEFAULT_${BUILD_TYPE}_FLAGS})
|
||||||
elseif()
|
elseif ()
|
||||||
set(ACTUAL_${BUILD_TYPE}_FLAGS ${${BUILD_TYPE}_FLAGS})
|
set(ACTUAL_${BUILD_TYPE}_FLAGS ${${BUILD_TYPE}_FLAGS})
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
@ -171,6 +185,7 @@ message(STATUS "Executable : ${EXE_NAME}")
|
|||||||
|
|
||||||
# below we have all the usual CMake target setup steps
|
# below we have all the usual CMake target setup steps
|
||||||
|
|
||||||
|
include_directories(.)
|
||||||
add_executable(${EXE_NAME} ${IMPL_SOURCES} main.cpp)
|
add_executable(${EXE_NAME} ${IMPL_SOURCES} main.cpp)
|
||||||
target_link_libraries(${EXE_NAME} PUBLIC ${LINK_LIBRARIES})
|
target_link_libraries(${EXE_NAME} PUBLIC ${LINK_LIBRARIES})
|
||||||
target_compile_definitions(${EXE_NAME} PUBLIC ${IMPL_DEFINITIONS})
|
target_compile_definitions(${EXE_NAME} PUBLIC ${IMPL_DEFINITIONS})
|
||||||
@ -186,9 +201,9 @@ target_link_options(${EXE_NAME} PUBLIC LINKER:${CXX_EXTRA_LINKER_FLAGS})
|
|||||||
target_link_options(${EXE_NAME} PUBLIC ${LINK_FLAGS} ${CXX_EXTRA_LINK_FLAGS})
|
target_link_options(${EXE_NAME} PUBLIC ${LINK_FLAGS} ${CXX_EXTRA_LINK_FLAGS})
|
||||||
|
|
||||||
# some models require the target to be already specified so they can finish their setup here
|
# some models require the target to be already specified so they can finish their setup here
|
||||||
# this only happens if the MODEL.cmake definition contains the `setup_target` macro
|
# this only happens if the model.cmake definition contains the `setup_target` macro
|
||||||
if (COMMAND setup_target)
|
if (COMMAND setup_target)
|
||||||
setup_target(${EXE_NAME})
|
setup_target(${EXE_NAME})
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
install (TARGETS ${EXE_NAME} DESTINATION bin)
|
install(TARGETS ${EXE_NAME} DESTINATION bin)
|
||||||
@ -225,10 +225,7 @@ setup_tbb() {
|
|||||||
|
|
||||||
setup_clang_gcc() {
|
setup_clang_gcc() {
|
||||||
|
|
||||||
echo "deb http://archive.ubuntu.com/ubuntu focal main universe" | sudo tee -a /etc/apt/sources.list
|
sudo apt-get install -y -qq gcc-10-offload-nvptx gcc-10-offload-amdgcn libtbb2 libtbb-dev g++-10 clang libomp-dev
|
||||||
|
|
||||||
sudo apt-get update -qq
|
|
||||||
sudo apt-get install -y -qq gcc-10-offload-nvptx gcc-10-offload-amdgcn libtbb2 libtbb-dev g++-10
|
|
||||||
|
|
||||||
export_var GCC_CXX "$(which g++-10)"
|
export_var GCC_CXX "$(which g++-10)"
|
||||||
verify_bin_exists "$GCC_CXX"
|
verify_bin_exists "$GCC_CXX"
|
||||||
@ -251,9 +248,6 @@ setup_clang_gcc() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
setup_rocm() {
|
setup_rocm() {
|
||||||
wget -q -O - "https://repo.radeon.com/rocm/rocm.gpg.key" | sudo apt-key add -
|
|
||||||
echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/4.5 ubuntu main' | sudo tee /etc/apt/sources.list.d/rocm.list
|
|
||||||
sudo apt-get update -qq
|
|
||||||
sudo apt-get install -y -qq rocm-dev rocthrust-dev
|
sudo apt-get install -y -qq rocm-dev rocthrust-dev
|
||||||
export_var ROCM_PATH "/opt/rocm"
|
export_var ROCM_PATH "/opt/rocm"
|
||||||
export_var PATH "$ROCM_PATH/bin:$PATH" # ROCm needs this for many of their libraries' CMake build to work
|
export_var PATH "$ROCM_PATH/bin:$PATH" # ROCm needs this for many of their libraries' CMake build to work
|
||||||
@ -320,9 +314,21 @@ if [ "${GITHUB_ACTIONS:-false}" = true ]; then
|
|||||||
echo "Running in GitHub Actions, defaulting to special export"
|
echo "Running in GitHub Actions, defaulting to special export"
|
||||||
TERM=xterm
|
TERM=xterm
|
||||||
export TERM=xterm
|
export TERM=xterm
|
||||||
|
|
||||||
|
# drop the lock in case we got one from a failed run
|
||||||
|
rm /var/lib/dpkg/lock-frontend || true
|
||||||
|
rm /var/cache/apt/archives/lock || true
|
||||||
|
|
||||||
|
wget -q -O - "https://repo.radeon.com/rocm/rocm.gpg.key" | sudo apt-key add -
|
||||||
|
echo "deb http://archive.ubuntu.com/ubuntu focal main universe" | sudo tee -a /etc/apt/sources.list
|
||||||
|
echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/4.5 ubuntu main' | sudo tee /etc/apt/sources.list.d/rocm.list
|
||||||
|
|
||||||
|
sudo apt-get update -qq
|
||||||
|
sudo apt-get install -y -qq cmake
|
||||||
|
|
||||||
if [ "$SETUP" = true ]; then
|
if [ "$SETUP" = true ]; then
|
||||||
echo "Deleting extra packages for space in 5 seconds..."
|
echo "Deleting extra packages for space in 2 seconds..."
|
||||||
sleep 5
|
sleep 2
|
||||||
echo "Starting apt-get remove:"
|
echo "Starting apt-get remove:"
|
||||||
sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel
|
sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel
|
||||||
sudo apt-get autoremove -y
|
sudo apt-get autoremove -y
|
||||||
@ -110,9 +110,7 @@ run_build() {
|
|||||||
# HIPSYCL_DIR="/opt/hipsycl/cff515c/"
|
# HIPSYCL_DIR="/opt/hipsycl/cff515c/"
|
||||||
|
|
||||||
# ICPX_CXX="/opt/intel/oneapi/compiler/2021.1.2/linux/bin/icpx"
|
# ICPX_CXX="/opt/intel/oneapi/compiler/2021.1.2/linux/bin/icpx"
|
||||||
# ICPC_CXX="/opt/intel/oneapi/compiler/2021.1.2/linux/bin/intel64/icpc"
|
# ICPC_CXX="/opt/intel/oneapi/compiler/2021.1.2/linux/bin/intel64/icpc"# TBB_LIB="/home/tom/Downloads/oneapi-tbb-2021.1.1/"
|
||||||
|
|
||||||
# TBB_LIB="/home/tom/Downloads/oneapi-tbb-2021.1.1/"
|
|
||||||
|
|
||||||
# GCC_STD_PAR_LIB="tbb"
|
# GCC_STD_PAR_LIB="tbb"
|
||||||
# CLANG_STD_PAR_LIB="tbb"
|
# CLANG_STD_PAR_LIB="tbb"
|
||||||
@ -130,42 +128,42 @@ build_gcc() {
|
|||||||
local name="gcc_build"
|
local name="gcc_build"
|
||||||
local cxx="-DCMAKE_CXX_COMPILER=${GCC_CXX:?}"
|
local cxx="-DCMAKE_CXX_COMPILER=${GCC_CXX:?}"
|
||||||
|
|
||||||
run_build $name "${GCC_CXX:?}" OMP "$cxx"
|
run_build $name "${GCC_CXX:?}" omp "$cxx"
|
||||||
if [ "$MODEL" = "all" ] || [ "$MODEL" = "OMP" ]; then
|
if [ "$MODEL" = "all" ] || [ "$MODEL" = "OMP" ]; then
|
||||||
# sanity check that it at least runs
|
# sanity check that it at least runs
|
||||||
echo "Sanity checking GCC OMP build..."
|
echo "Sanity checking GCC omp build..."
|
||||||
"./$BUILD_DIR/OMP_$name/omp-stream" -s 1048576 -n 10
|
"./$BUILD_DIR/omp_$name/omp-stream" -s 1048576 -n 10
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# some distributions like Ubuntu bionic implements std par with TBB, so conditionally link it here
|
# some distributions like Ubuntu bionic implements std par with TBB, so conditionally link it here
|
||||||
run_build $name "${GCC_CXX:?}" STD "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}"
|
run_build $name "${GCC_CXX:?}" std "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}"
|
||||||
run_build $name "${GCC_CXX:?}" STD20 "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}"
|
run_build $name "${GCC_CXX:?}" std20 "$cxx -DCXX_EXTRA_LIBRARIES=${GCC_STD_PAR_LIB:-}"
|
||||||
|
|
||||||
run_build $name "${GCC_CXX:?}" TBB "$cxx -DONE_TBB_DIR=$TBB_LIB"
|
run_build $name "${GCC_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB"
|
||||||
run_build $name "${GCC_CXX:?}" TBB "$cxx" # build TBB again with the system TBB
|
run_build $name "${GCC_CXX:?}" tbb "$cxx" # build TBB again with the system TBB
|
||||||
|
|
||||||
if [ "${GCC_OMP_OFFLOAD_AMD:-false}" != "false" ]; then
|
if [ "${GCC_OMP_OFFLOAD_AMD:-false}" != "false" ]; then
|
||||||
run_build "amd_$name" "${GCC_CXX:?}" ACC "$cxx -DCXX_EXTRA_FLAGS=-foffload=amdgcn-amdhsa"
|
run_build "amd_$name" "${GCC_CXX:?}" acc "$cxx -DCXX_EXTRA_FLAGS=-foffload=amdgcn-amdhsa"
|
||||||
run_build "amd_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=AMD:$AMD_ARCH"
|
run_build "amd_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=AMD:$AMD_ARCH"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "${GCC_OMP_OFFLOAD_NVIDIA:-false}" != "false" ]; then
|
if [ "${GCC_OMP_OFFLOAD_NVIDIA:-false}" != "false" ]; then
|
||||||
run_build "nvidia_$name" "${GCC_CXX:?}" ACC "$cxx -DCXX_EXTRA_FLAGS=-foffload=nvptx-none"
|
run_build "nvidia_$name" "${GCC_CXX:?}" acc "$cxx -DCXX_EXTRA_FLAGS=-foffload=nvptx-none"
|
||||||
run_build "nvidia_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH"
|
run_build "nvidia_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
run_build $name "${GCC_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH"
|
run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH"
|
||||||
run_build $name "${GCC_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED"
|
run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED"
|
||||||
run_build $name "${GCC_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT"
|
run_build $name "${GCC_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT"
|
||||||
# run_build $name "${CC_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_CUDA=ON"
|
# run_build $name "${CC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_CUDA=ON"
|
||||||
run_build "cuda_$name" "${GCC_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
|
run_build "cuda_$name" "${GCC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
|
||||||
run_build $name "${GCC_CXX:?}" OCL "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
|
run_build $name "${GCC_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
|
||||||
run_build $name "${GCC_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
|
run_build $name "${GCC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
|
||||||
|
|
||||||
# FIXME fails due to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100102
|
# FIXME fails due to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100102
|
||||||
# FIXME we also got https://github.com/NVIDIA/nccl/issues/494
|
# FIXME we also got https://github.com/NVIDIA/nccl/issues/494
|
||||||
|
|
||||||
# run_build "cuda_$name" "${GCC_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} \
|
# run_build "cuda_$name" "${GCC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?} \
|
||||||
# -DENABLE_CUDA=ON \
|
# -DENABLE_CUDA=ON \
|
||||||
# -DTARGET=NVIDIA \
|
# -DTARGET=NVIDIA \
|
||||||
# -DCUDA_TOOLKIT_ROOT_DIR=${NVHPC_CUDA_DIR:?} \
|
# -DCUDA_TOOLKIT_ROOT_DIR=${NVHPC_CUDA_DIR:?} \
|
||||||
@ -176,9 +174,9 @@ build_gcc() {
|
|||||||
local current=$("$CMAKE_BIN" --version | head -n 1 | cut -d ' ' -f3)
|
local current=$("$CMAKE_BIN" --version | head -n 1 | cut -d ' ' -f3)
|
||||||
local required="3.15.0"
|
local required="3.15.0"
|
||||||
if [ "$(printf '%s\n' "$required" "$current" | sort -V | head -n1)" = "$required" ]; then
|
if [ "$(printf '%s\n' "$required" "$current" | sort -V | head -n1)" = "$required" ]; then
|
||||||
run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=CUDA"
|
run_build $name "${GCC_CXX:?}" thrust "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=CUDA"
|
||||||
run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=OMP"
|
run_build $name "${GCC_CXX:?}" thrust "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=OMP"
|
||||||
run_build $name "${GCC_CXX:?}" THRUST "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=CPP"
|
run_build $name "${GCC_CXX:?}" thrust "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DSDK_DIR=$NVHPC_CUDA_DIR/include -DTHRUST_IMPL=CUDA -DBACKEND=CPP"
|
||||||
|
|
||||||
# FIXME CUDA Thrust + TBB throws the following error:
|
# FIXME CUDA Thrust + TBB throws the following error:
|
||||||
# /usr/lib/gcc/x86_64-linux-gnu/9/include/avx512fintrin.h(9146): error: identifier "__builtin_ia32_rndscaless_round" is undefined
|
# /usr/lib/gcc/x86_64-linux-gnu/9/include/avx512fintrin.h(9146): error: identifier "__builtin_ia32_rndscaless_round" is undefined
|
||||||
@ -198,54 +196,62 @@ build_gcc() {
|
|||||||
build_clang() {
|
build_clang() {
|
||||||
local name="clang_build"
|
local name="clang_build"
|
||||||
local cxx="-DCMAKE_CXX_COMPILER=${CLANG_CXX:?}"
|
local cxx="-DCMAKE_CXX_COMPILER=${CLANG_CXX:?}"
|
||||||
run_build $name "${CLANG_CXX:?}" OMP "$cxx"
|
run_build $name "${CLANG_CXX:?}" omp "$cxx"
|
||||||
|
|
||||||
if [ "${CLANG_OMP_OFFLOAD_AMD:-false}" != "false" ]; then
|
if [ "${CLANG_OMP_OFFLOAD_AMD:-false}" != "false" ]; then
|
||||||
run_build "amd_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=AMD:$AMD_ARCH"
|
run_build "amd_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=AMD:$AMD_ARCH"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "${CLANG_OMP_OFFLOAD_NVIDIA:-false}" != "false" ]; then
|
if [ "${CLANG_OMP_OFFLOAD_NVIDIA:-false}" != "false" ]; then
|
||||||
run_build "nvidia_$name" "${GCC_CXX:?}" OMP "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH"
|
run_build "nvidia_$name" "${GCC_CXX:?}" omp "$cxx -DOFFLOAD=NVIDIA:$NV_ARCH"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
run_build $name "${CLANG_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH"
|
run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH"
|
||||||
run_build $name "${CLANG_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED"
|
run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED"
|
||||||
run_build $name "${CLANG_CXX:?}" CUDA "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT"
|
run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT"
|
||||||
run_build $name "${CLANG_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
|
run_build $name "${CLANG_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
|
||||||
run_build $name "${CLANG_CXX:?}" OCL "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
|
run_build $name "${CLANG_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
|
||||||
run_build $name "${CLANG_CXX:?}" STD "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}"
|
run_build $name "${CLANG_CXX:?}" std "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}"
|
||||||
# run_build $name "${LANG_CXX:?}" STD20 "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" # not yet supported
|
# run_build $name "${LANG_CXX:?}" std20 "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" # not yet supported
|
||||||
|
run_build $name "${CLANG_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
|
||||||
|
run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH"
|
||||||
|
run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=MANAGED"
|
||||||
|
run_build $name "${CLANG_CXX:?}" cuda "$cxx -DCMAKE_CUDA_COMPILER=${NVHPC_NVCC:?} -DCUDA_ARCH=$NV_ARCH -DMEM=PAGEFAULT"
|
||||||
|
run_build $name "${CLANG_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
|
||||||
|
run_build $name "${CLANG_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
|
||||||
|
run_build $name "${CLANG_CXX:?}" std "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}"
|
||||||
|
# run_build $name "${LANG_CXX:?}" std20 "$cxx -DCXX_EXTRA_LIBRARIES=${CLANG_STD_PAR_LIB:-}" # not yet supported
|
||||||
|
|
||||||
run_build $name "${CLANG_CXX:?}" TBB "$cxx -DONE_TBB_DIR=$TBB_LIB"
|
run_build $name "${CLANG_CXX:?}" tbb "$cxx -DONE_TBB_DIR=$TBB_LIB"
|
||||||
run_build $name "${CLANG_CXX:?}" TBB "$cxx" # build TBB again with the system TBB
|
run_build $name "${CLANG_CXX:?}" tbb "$cxx" # build TBB again with the system TBB
|
||||||
|
|
||||||
run_build $name "${CLANG_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
|
run_build $name "${CLANG_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
|
||||||
# no clang /w RAJA+cuda because it needs nvcc which needs gcc
|
# no clang /w RAJA+cuda because it needs nvcc which needs gcc
|
||||||
}
|
}
|
||||||
|
|
||||||
build_nvhpc() {
|
build_nvhpc() {
|
||||||
local name="nvhpc_build"
|
local name="nvhpc_build"
|
||||||
local cxx="-DCMAKE_CXX_COMPILER=${NVHPC_NVCXX:?}"
|
local cxx="-DCMAKE_CXX_COMPILER=${NVHPC_NVCXX:?}"
|
||||||
run_build $name "${NVHPC_NVCXX:?}" STD "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY"
|
run_build $name "${NVHPC_NVCXX:?}" std "$cxx -DNVHPC_OFFLOAD=$NV_ARCH_CCXY"
|
||||||
run_build $name "${NVHPC_NVCXX:?}" ACC "$cxx -DTARGET_DEVICE=gpu -DTARGET_PROCESSOR=px -DCUDA_ARCH=$NV_ARCH_CCXY"
|
run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=gpu -DTARGET_PROCESSOR=px -DCUDA_ARCH=$NV_ARCH_CCXY"
|
||||||
run_build $name "${NVHPC_NVCXX:?}" ACC "$cxx -DTARGET_DEVICE=multicore -DTARGET_PROCESSOR=zen"
|
run_build $name "${NVHPC_NVCXX:?}" acc "$cxx -DTARGET_DEVICE=multicore -DTARGET_PROCESSOR=zen"
|
||||||
}
|
}
|
||||||
|
|
||||||
build_aocc() {
|
build_aocc() {
|
||||||
run_build aocc_build "${AOCC_CXX:?}" OMP "-DCMAKE_CXX_COMPILER=${AOCC_CXX:?}"
|
run_build aocc_build "${AOCC_CXX:?}" omp "-DCMAKE_CXX_COMPILER=${AOCC_CXX:?}"
|
||||||
}
|
}
|
||||||
|
|
||||||
build_aomp() {
|
build_aomp() {
|
||||||
run_build aomp_amd_build "${AOMP_CXX:?}" OMP "-DCMAKE_CXX_COMPILER=${AOMP_CXX:?} -DOFFLOAD=AMD:gfx906"
|
run_build aomp_amd_build "${AOMP_CXX:?}" omp "-DCMAKE_CXX_COMPILER=${AOMP_CXX:?} -DOFFLOAD=AMD:gfx906"
|
||||||
#run_build aomp_nvidia_build "-DCMAKE_CXX_COMPILER=${AOMP_CXX:?} -DOFFLOAD=NVIDIA:$NV_ARCH"
|
#run_build aomp_nvidia_build "-DCMAKE_CXX_COMPILER=${AOMP_CXX:?} -DOFFLOAD=NVIDIA:$NV_ARCH"
|
||||||
}
|
}
|
||||||
|
|
||||||
build_hip() {
|
build_hip() {
|
||||||
local name="hip_build"
|
local name="hip_build"
|
||||||
|
|
||||||
run_build $name "${HIP_CXX:?}" HIP "-DCMAKE_CXX_COMPILER=${HIP_CXX:?}"
|
run_build $name "${HIP_CXX:?}" hip "-DCMAKE_CXX_COMPILER=${HIP_CXX:?}"
|
||||||
|
|
||||||
run_build $name "${GCC_CXX:?}" THRUST "-DCMAKE_CXX_COMPILER=${HIP_CXX:?} -DSDK_DIR=$ROCM_PATH -DTHRUST_IMPL=ROCM"
|
run_build $name "${GCC_CXX:?}" thrust "-DCMAKE_CXX_COMPILER=${HIP_CXX:?} -DSDK_DIR=$ROCM_PATH -DTHRUST_IMPL=ROCM"
|
||||||
}
|
}
|
||||||
|
|
||||||
build_icpx() {
|
build_icpx() {
|
||||||
@ -253,7 +259,7 @@ build_icpx() {
|
|||||||
set +u
|
set +u
|
||||||
source /opt/intel/oneapi/setvars.sh -force || true
|
source /opt/intel/oneapi/setvars.sh -force || true
|
||||||
set -u
|
set -u
|
||||||
run_build intel_build "${ICPX_CXX:?}" OMP "-DCMAKE_CXX_COMPILER=${ICPX_CXX:?} -DOFFLOAD=INTEL"
|
run_build intel_build "${ICPX_CXX:?}" omp "-DCMAKE_CXX_COMPILER=${ICPX_CXX:?} -DOFFLOAD=INTEL"
|
||||||
}
|
}
|
||||||
|
|
||||||
build_icpc() {
|
build_icpc() {
|
||||||
@ -263,31 +269,31 @@ build_icpc() {
|
|||||||
set -u
|
set -u
|
||||||
local name="intel_build"
|
local name="intel_build"
|
||||||
local cxx="-DCMAKE_CXX_COMPILER=${ICPC_CXX:?}"
|
local cxx="-DCMAKE_CXX_COMPILER=${ICPC_CXX:?}"
|
||||||
run_build $name "${ICPC_CXX:?}" OMP "$cxx"
|
run_build $name "${ICPC_CXX:?}" omp "$cxx"
|
||||||
run_build $name "${ICPC_CXX:?}" OCL "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
|
run_build $name "${ICPC_CXX:?}" ocl "$cxx -DOpenCL_LIBRARY=${OCL_LIB:?}"
|
||||||
run_build $name "${ICPC_CXX:?}" RAJA "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
|
run_build $name "${ICPC_CXX:?}" raja "$cxx -DRAJA_IN_TREE=${RAJA_SRC:?}"
|
||||||
run_build $name "${ICPC_CXX:?}" KOKKOS "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
|
run_build $name "${ICPC_CXX:?}" kokkos "$cxx -DKOKKOS_IN_TREE=${KOKKOS_SRC:?} -DKokkos_ENABLE_OPENMP=ON"
|
||||||
}
|
}
|
||||||
|
|
||||||
build_computecpp() {
|
build_computecpp() {
|
||||||
run_build computecpp_build "compute++" SYCL "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \
|
run_build computecpp_build "compute++" sycl "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \
|
||||||
-DSYCL_COMPILER=COMPUTECPP \
|
-DSYCL_COMPILER=COMPUTECPP \
|
||||||
-DSYCL_COMPILER_DIR=${COMPUTECPP_DIR:?} \
|
-DSYCL_COMPILER_DIR=${COMPUTECPP_DIR:?} \
|
||||||
-DOpenCL_LIBRARY=${OCL_LIB:?}"
|
-DOpenCL_LIBRARY=${OCL_LIB:?}"
|
||||||
}
|
}
|
||||||
|
|
||||||
build_dpcpp() {
|
build_dpcpp() {
|
||||||
run_build intel_build "${DPCPP_DIR:?}" SYCL "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \
|
run_build intel_build "${DPCPP_DIR:?}" sycl "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} \
|
||||||
-DSYCL_COMPILER=DPCPP \
|
-DSYCL_COMPILER=DPCPP \
|
||||||
-DSYCL_COMPILER_DIR=${DPCPP_DIR:?}"
|
-DSYCL_COMPILER_DIR=${DPCPP_DIR:?}"
|
||||||
|
|
||||||
# for oneAPI BaseKit:
|
# for oneAPI BaseKit:
|
||||||
# source /opt/intel/oneapi/setvars.sh -force
|
# source /opt/intel/oneapi/setvars.sh -force
|
||||||
# run_build intel_build "dpcpp" SYCL "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} -DSYCL_COMPILER=ONEAPI-DPCPP"
|
# run_build intel_build "dpcpp" sycl "-DCMAKE_CXX_COMPILER=${GCC_CXX:?} -DSYCL_COMPILER=ONEAPI-DPCPP"
|
||||||
}
|
}
|
||||||
|
|
||||||
build_hipsycl() {
|
build_hipsycl() {
|
||||||
run_build hipsycl_build "syclcc" SYCL "
|
run_build hipsycl_build "syclcc" sycl "
|
||||||
-DSYCL_COMPILER=HIPSYCL \
|
-DSYCL_COMPILER=HIPSYCL \
|
||||||
-DSYCL_COMPILER_DIR=${HIPSYCL_DIR:?}"
|
-DSYCL_COMPILER_DIR=${HIPSYCL_DIR:?}"
|
||||||
}
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user