From 83516ae35279d78a1833572b2a66c0e0a6a5942e Mon Sep 17 00:00:00 2001 From: Tom Deakin Date: Tue, 3 May 2016 11:41:00 +0100 Subject: [PATCH] Update cl2.hpp --- CL/cl2.hpp | 209 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 127 insertions(+), 82 deletions(-) diff --git a/CL/cl2.hpp b/CL/cl2.hpp index ad0c7c4..e0f55fe 100644 --- a/CL/cl2.hpp +++ b/CL/cl2.hpp @@ -28,11 +28,11 @@ /*! \file * - * \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33), + * \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33), * OpenCL 1.2 (rev 15) and OpenCL 2.0 (rev 29) * \author Lee Howes and Bruce Merry - * - * Derived from the OpenCL 1.x C++ bindings written by + * + * Derived from the OpenCL 1.x C++ bindings written by * Benedict R. Gaster, Laurent Morichetti and Lee Howes * With additions and fixes from: * Brian Cole, March 3rd 2010 and April 2012 @@ -52,6 +52,18 @@ * #define CL_HPP_USE_DX_INTEROP * cl_khr_sub_groups * #define CL_HPP_USE_CL_SUB_GROUPS_KHR + * + * Doxygen documentation for this header is available here: + * + * http://khronosgroup.github.io/OpenCL-CLHPP/ + * + * The latest version of this header can be found on the GitHub releases page: + * + * https://github.com/KhronosGroup/OpenCL-CLHPP/releases + * + * Bugs and patches can be submitted to the GitHub repository: + * + * https://github.com/KhronosGroup/OpenCL-CLHPP */ /*! \mainpage @@ -134,41 +146,64 @@ * * \section parameterization Parameters * This header may be parameterized by a set of preprocessor macros. - * CL_HPP_TARGET_OPENCL_VERSION - * - Defines the target OpenCL runtime version to build the header against. - * Defaults to 200, representing OpenCL 2.0. - * CL_HPP_NO_STD_STRING - * - Do not use the standard library string class. - * cl::string is not defined and may be defined by the user before - * cl2.hpp is included. - * CL_HPP_NO_STD_VECTOR - * - Do not use the standard library vector class. - * cl::vector is not defined and may be defined by the user before - * cl2.hpp is included. - * CL_HPP_NO_STD_ARRAY - * - Do not use the standard library array class. - * cl::array is not defined and may be defined by the user before - * cl2.hpp is included. - * CL_HPP_NO_STD_UNIQUE_PTR - * - Do not use the standard library unique_ptr class. - * cl::pointer and the cl::allocate_pointer function are not defined - * and may be defined by the user before cl2.hpp is included. - * CL_HPP_ENABLE_DEVICE_FISSION - * - Enables device fission for OpenCL 1.2 platforms - * CL_HPP_ENABLE_EXCEPTIONS - * - Enable exceptions for use in the C++ bindings header. - * This is the preferred error handling mechanism but is not required. - * CL_HPP_ENABLE_SIZE_T_COMPATIBILITY - * - Backward compatibility option to support cl.hpp-style size_t class. - * Replaces the updated std::array derived version and removal of size_t - * from the namespace. Note that in this case the new size_t class - * is placed in the cl::compatibility namespace and thus requires - * an additional using declaration for direct backward compatibility. - * CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY - * - Enable older vector of pairs interface for construction of programs. - * CL_HPP_CL_1_2_DEFAULT_BUILD - * - Default to OpenCL C 1.2 compilation rather than OpenCL C 2.0 - * - applies to use of cl::Program construction and other program build variants. + * + * - CL_HPP_TARGET_OPENCL_VERSION + * + * Defines the target OpenCL runtime version to build the header + * against. Defaults to 200, representing OpenCL 2.0. + * + * - CL_HPP_NO_STD_STRING + * + * Do not use the standard library string class. cl::string is not + * defined and may be defined by the user before cl2.hpp is + * included. + * + * - CL_HPP_NO_STD_VECTOR + * + * Do not use the standard library vector class. cl::vector is not + * defined and may be defined by the user before cl2.hpp is + * included. + * + * - CL_HPP_NO_STD_ARRAY + * + * Do not use the standard library array class. cl::array is not + * defined and may be defined by the user before cl2.hpp is + * included. + * + * - CL_HPP_NO_STD_UNIQUE_PTR + * + * Do not use the standard library unique_ptr class. cl::pointer and + * the cl::allocate_pointer functions are not defined and may be + * defined by the user before cl2.hpp is included. + * + * - CL_HPP_ENABLE_DEVICE_FISSION + * + * Enables device fission for OpenCL 1.2 platforms. + * + * - CL_HPP_ENABLE_EXCEPTIONS + * + * Enable exceptions for use in the C++ bindings header. This is the + * preferred error handling mechanism but is not required. + * + * - CL_HPP_ENABLE_SIZE_T_COMPATIBILITY + * + * Backward compatibility option to support cl.hpp-style size_t + * class. Replaces the updated std::array derived version and + * removal of size_t from the namespace. Note that in this case the + * new size_t class is placed in the cl::compatibility namespace and + * thus requires an additional using declaration for direct backward + * compatibility. + * + * - CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY + * + * Enable older vector of pairs interface for construction of + * programs. + * + * - CL_HPP_CL_1_2_DEFAULT_BUILD + * + * Default to OpenCL C 1.2 compilation rather than OpenCL C 2.0 + * applies to use of cl::Program construction and other program + * build variants. * * * \section example Example @@ -177,19 +212,19 @@ * bindings, including support for the optional exception feature and * also the supplied vector and string classes, see following sections for * decriptions of these features. - * + * * \code #define CL_HPP_ENABLE_EXCEPTIONS #define CL_HPP_TARGET_OPENCL_VERSION 200 - + #include #include #include #include #include - + const int numElements = 32; - + int main(void) { // Filter for a 2.0 platform and set it as the default @@ -212,35 +247,45 @@ std::cout << "Error setting default platform."; return -1; } - - std::string kernel1{ - "global int globalA;" - "kernel void updateGlobal(){" - " globalA = 75;" - "}"}; - std::string kernel2{ - "typedef struct { global int *bar; } Foo; kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB, global int *output, int val, write_only pipe int outPipe, queue_t childQueue){" - " output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar);" - " write_pipe(outPipe, &val);" - " queue_t default_queue = get_default_queue(); " - " ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2); " - // Have a child kernel write into third quarter of output - " enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, " - " ^{" - " output[get_global_size(0)*2 + get_global_id(0)] = inputA[get_global_size(0)*2+get_global_id(0)] + inputB[get_global_size(0)*2+get_global_id(0)] + globalA;" - " });" - // Have a child kernel write into last quarter of output - " enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, " - " ^{" - " output[get_global_size(0)*3 + get_global_id(0)] = inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2;" - " });" - "}" }; + + // Use C++11 raw string literals for kernel source code + std::string kernel1{R"CLC( + global int globalA; + kernel void updateGlobal() + { + globalA = 75; + } + )CLC"}; + std::string kernel2{R"CLC( + typedef struct { global int *bar; } Foo; + kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB, + global int *output, int val, write_only pipe int outPipe, queue_t childQueue) + { + output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar); + write_pipe(outPipe, &val); + queue_t default_queue = get_default_queue(); + ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2); + + // Have a child kernel write into third quarter of output + enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, + ^{ + output[get_global_size(0)*2 + get_global_id(0)] = + inputA[get_global_size(0)*2 + get_global_id(0)] + inputB[get_global_size(0)*2 + get_global_id(0)] + globalA; + }); + + // Have a child kernel write into last quarter of output + enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, + ^{ + output[get_global_size(0)*3 + get_global_id(0)] = + inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2; + }); + } + )CLC"}; // New simpler string interface style std::vector programStrings {kernel1, kernel2}; - cl::Program vectorAddProgram( - programStrings); + cl::Program vectorAddProgram(programStrings); try { vectorAddProgram.build("-cl-std=CL2.0"); } @@ -251,7 +296,7 @@ for (auto &pair : buildInfo) { std::cerr << pair.second << std::endl << std::endl; } - + return 1; } @@ -264,17 +309,17 @@ program2Kernel( cl::EnqueueArgs( cl::NDRange(1))); - + ////////////////// // SVM allocations - - cl::pointer anSVMInt = cl::allocate_svm>(); + + auto anSVMInt = cl::allocate_svm>(); *anSVMInt = 5; - cl::SVMAllocator>> svmAllocReadOnly; + cl::SVMAllocator>> svmAllocReadOnly; auto fooPointer = cl::allocate_pointer(svmAllocReadOnly); fooPointer->bar = anSVMInt.get(); cl::SVMAllocator> svmAlloc; - std::vector>> inputA(numElements, 1, svmAlloc); + std::vector>> inputA(numElements, 1, svmAlloc); cl::coarse_svm_vector inputB(numElements, 2, svmAlloc); // @@ -284,7 +329,7 @@ std::vector output(numElements, 0xdeadbeef); cl::Buffer outputBuffer(begin(output), end(output), false); cl::Pipe aPipe(sizeof(cl_int), numElements / 2); - + // Default command queue, also passed in as a parameter cl::DeviceCommandQueue defaultDeviceQueue = cl::DeviceCommandQueue::makeDefault( cl::Context::getDefault(), cl::Device::getDefault()); @@ -339,7 +384,7 @@ return 0; } - * + * * \endcode * */ @@ -3538,7 +3583,7 @@ template cl::pointer> allocate_pointer(const Alloc &alloc_, Args&&... args) { Alloc alloc(alloc_); - static const size_t copies = 1; + static const size_type copies = 1; // Ensure that creation of the management block and the // object are dealt with separately such that we only provide a deleter @@ -6520,7 +6565,7 @@ inline cl_int cl::Program::getInfo(cl_program_info name, vectorresize(numBinaries); - for (int i = 0; i < numBinaries; ++i) { + for (size_type i = 0; i < numBinaries; ++i) { (*param)[i].resize(sizes[i]); } @@ -7107,7 +7152,7 @@ public: size_type buffer_slice_pitch, size_type host_row_pitch, size_type host_slice_pitch, - void *ptr, + const void *ptr, const vector* events = NULL, Event* event = NULL) const { @@ -7255,7 +7300,7 @@ public: const array& region, size_type row_pitch, size_type slice_pitch, - void* ptr, + const void* ptr, const vector* events = NULL, Event* event = NULL) const { @@ -7845,7 +7890,7 @@ public: CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int enqueueTask( const Kernel& kernel, const vector* events = NULL, - Event* event = NULL) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED const + Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED { cl_event tmp; cl_int err = detail::errHandler( @@ -8873,7 +8918,7 @@ inline cl_int enqueueWriteBufferRect( size_type buffer_slice_pitch, size_type host_row_pitch, size_type host_slice_pitch, - void *ptr, + const void *ptr, const vector* events = NULL, Event* event = NULL) { @@ -8971,7 +9016,7 @@ inline cl_int enqueueWriteImage( const array& region, size_type row_pitch, size_type slice_pitch, - void* ptr, + const void* ptr, const vector* events = NULL, Event* event = NULL) {