Update cl2.hpp

This commit is contained in:
Tom Deakin 2016-05-03 11:41:00 +01:00
parent 95f9efb7d9
commit 83516ae352

View File

@ -52,6 +52,18 @@
* #define CL_HPP_USE_DX_INTEROP
* cl_khr_sub_groups
* #define CL_HPP_USE_CL_SUB_GROUPS_KHR
*
* Doxygen documentation for this header is available here:
*
* http://khronosgroup.github.io/OpenCL-CLHPP/
*
* The latest version of this header can be found on the GitHub releases page:
*
* https://github.com/KhronosGroup/OpenCL-CLHPP/releases
*
* Bugs and patches can be submitted to the GitHub repository:
*
* https://github.com/KhronosGroup/OpenCL-CLHPP
*/
/*! \mainpage
@ -134,41 +146,64 @@
*
* \section parameterization Parameters
* This header may be parameterized by a set of preprocessor macros.
* CL_HPP_TARGET_OPENCL_VERSION
* - Defines the target OpenCL runtime version to build the header against.
* Defaults to 200, representing OpenCL 2.0.
* CL_HPP_NO_STD_STRING
* - Do not use the standard library string class.
* cl::string is not defined and may be defined by the user before
* cl2.hpp is included.
* CL_HPP_NO_STD_VECTOR
* - Do not use the standard library vector class.
* cl::vector is not defined and may be defined by the user before
* cl2.hpp is included.
* CL_HPP_NO_STD_ARRAY
* - Do not use the standard library array class.
* cl::array is not defined and may be defined by the user before
* cl2.hpp is included.
* CL_HPP_NO_STD_UNIQUE_PTR
* - Do not use the standard library unique_ptr class.
* cl::pointer and the cl::allocate_pointer function are not defined
* and may be defined by the user before cl2.hpp is included.
* CL_HPP_ENABLE_DEVICE_FISSION
* - Enables device fission for OpenCL 1.2 platforms
* CL_HPP_ENABLE_EXCEPTIONS
* - Enable exceptions for use in the C++ bindings header.
* This is the preferred error handling mechanism but is not required.
* CL_HPP_ENABLE_SIZE_T_COMPATIBILITY
* - Backward compatibility option to support cl.hpp-style size_t class.
* Replaces the updated std::array derived version and removal of size_t
* from the namespace. Note that in this case the new size_t class
* is placed in the cl::compatibility namespace and thus requires
* an additional using declaration for direct backward compatibility.
* CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY
* - Enable older vector of pairs interface for construction of programs.
* CL_HPP_CL_1_2_DEFAULT_BUILD
* - Default to OpenCL C 1.2 compilation rather than OpenCL C 2.0
* - applies to use of cl::Program construction and other program build variants.
*
* - CL_HPP_TARGET_OPENCL_VERSION
*
* Defines the target OpenCL runtime version to build the header
* against. Defaults to 200, representing OpenCL 2.0.
*
* - CL_HPP_NO_STD_STRING
*
* Do not use the standard library string class. cl::string is not
* defined and may be defined by the user before cl2.hpp is
* included.
*
* - CL_HPP_NO_STD_VECTOR
*
* Do not use the standard library vector class. cl::vector is not
* defined and may be defined by the user before cl2.hpp is
* included.
*
* - CL_HPP_NO_STD_ARRAY
*
* Do not use the standard library array class. cl::array is not
* defined and may be defined by the user before cl2.hpp is
* included.
*
* - CL_HPP_NO_STD_UNIQUE_PTR
*
* Do not use the standard library unique_ptr class. cl::pointer and
* the cl::allocate_pointer functions are not defined and may be
* defined by the user before cl2.hpp is included.
*
* - CL_HPP_ENABLE_DEVICE_FISSION
*
* Enables device fission for OpenCL 1.2 platforms.
*
* - CL_HPP_ENABLE_EXCEPTIONS
*
* Enable exceptions for use in the C++ bindings header. This is the
* preferred error handling mechanism but is not required.
*
* - CL_HPP_ENABLE_SIZE_T_COMPATIBILITY
*
* Backward compatibility option to support cl.hpp-style size_t
* class. Replaces the updated std::array derived version and
* removal of size_t from the namespace. Note that in this case the
* new size_t class is placed in the cl::compatibility namespace and
* thus requires an additional using declaration for direct backward
* compatibility.
*
* - CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY
*
* Enable older vector of pairs interface for construction of
* programs.
*
* - CL_HPP_CL_1_2_DEFAULT_BUILD
*
* Default to OpenCL C 1.2 compilation rather than OpenCL C 2.0
* applies to use of cl::Program construction and other program
* build variants.
*
*
* \section example Example
@ -213,34 +248,44 @@
return -1;
}
std::string kernel1{
"global int globalA;"
"kernel void updateGlobal(){"
" globalA = 75;"
"}"};
std::string kernel2{
"typedef struct { global int *bar; } Foo; kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB, global int *output, int val, write_only pipe int outPipe, queue_t childQueue){"
" output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar);"
" write_pipe(outPipe, &val);"
" queue_t default_queue = get_default_queue(); "
" ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2); "
// Use C++11 raw string literals for kernel source code
std::string kernel1{R"CLC(
global int globalA;
kernel void updateGlobal()
{
globalA = 75;
}
)CLC"};
std::string kernel2{R"CLC(
typedef struct { global int *bar; } Foo;
kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB,
global int *output, int val, write_only pipe int outPipe, queue_t childQueue)
{
output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar);
write_pipe(outPipe, &val);
queue_t default_queue = get_default_queue();
ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2);
// Have a child kernel write into third quarter of output
" enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, "
" ^{"
" output[get_global_size(0)*2 + get_global_id(0)] = inputA[get_global_size(0)*2+get_global_id(0)] + inputB[get_global_size(0)*2+get_global_id(0)] + globalA;"
" });"
enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
^{
output[get_global_size(0)*2 + get_global_id(0)] =
inputA[get_global_size(0)*2 + get_global_id(0)] + inputB[get_global_size(0)*2 + get_global_id(0)] + globalA;
});
// Have a child kernel write into last quarter of output
" enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, "
" ^{"
" output[get_global_size(0)*3 + get_global_id(0)] = inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2;"
" });"
"}" };
enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
^{
output[get_global_size(0)*3 + get_global_id(0)] =
inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2;
});
}
)CLC"};
// New simpler string interface style
std::vector<std::string> programStrings {kernel1, kernel2};
cl::Program vectorAddProgram(
programStrings);
cl::Program vectorAddProgram(programStrings);
try {
vectorAddProgram.build("-cl-std=CL2.0");
}
@ -268,9 +313,9 @@
//////////////////
// SVM allocations
cl::pointer<int> anSVMInt = cl::allocate_svm<int, cl::SVMTraitCoarse<>>();
auto anSVMInt = cl::allocate_svm<int, cl::SVMTraitCoarse<>>();
*anSVMInt = 5;
cl::SVMAllocator<int, cl::SVMTraitCoarse<cl::SVMTraitReadOnly<>>> svmAllocReadOnly;
cl::SVMAllocator<Foo, cl::SVMTraitCoarse<cl::SVMTraitReadOnly<>>> svmAllocReadOnly;
auto fooPointer = cl::allocate_pointer<Foo>(svmAllocReadOnly);
fooPointer->bar = anSVMInt.get();
cl::SVMAllocator<int, cl::SVMTraitCoarse<>> svmAlloc;
@ -3538,7 +3583,7 @@ template <class T, class Alloc, class... Args>
cl::pointer<T, detail::Deleter<Alloc>> allocate_pointer(const Alloc &alloc_, Args&&... args)
{
Alloc alloc(alloc_);
static const size_t copies = 1;
static const size_type copies = 1;
// Ensure that creation of the management block and the
// object are dealt with separately such that we only provide a deleter
@ -6520,7 +6565,7 @@ inline cl_int cl::Program::getInfo(cl_program_info name, vector<vector<unsigned
// Resize the parameter array and constituent arrays
param->resize(numBinaries);
for (int i = 0; i < numBinaries; ++i) {
for (size_type i = 0; i < numBinaries; ++i) {
(*param)[i].resize(sizes[i]);
}
@ -7107,7 +7152,7 @@ public:
size_type buffer_slice_pitch,
size_type host_row_pitch,
size_type host_slice_pitch,
void *ptr,
const void *ptr,
const vector<Event>* events = NULL,
Event* event = NULL) const
{
@ -7255,7 +7300,7 @@ public:
const array<size_type, 3>& region,
size_type row_pitch,
size_type slice_pitch,
void* ptr,
const void* ptr,
const vector<Event>* events = NULL,
Event* event = NULL) const
{
@ -7845,7 +7890,7 @@ public:
CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int enqueueTask(
const Kernel& kernel,
const vector<Event>* events = NULL,
Event* event = NULL) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED const
Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED
{
cl_event tmp;
cl_int err = detail::errHandler(
@ -8873,7 +8918,7 @@ inline cl_int enqueueWriteBufferRect(
size_type buffer_slice_pitch,
size_type host_row_pitch,
size_type host_slice_pitch,
void *ptr,
const void *ptr,
const vector<Event>* events = NULL,
Event* event = NULL)
{
@ -8971,7 +9016,7 @@ inline cl_int enqueueWriteImage(
const array<size_type, 3>& region,
size_type row_pitch,
size_type slice_pitch,
void* ptr,
const void* ptr,
const vector<Event>* events = NULL,
Event* event = NULL)
{