diff --git a/CL/cl2.hpp b/CL/cl2.hpp index 07b1c99..ad0c7c4 100644 --- a/CL/cl2.hpp +++ b/CL/cl2.hpp @@ -12,6 +12,11 @@ * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. @@ -34,16 +39,19 @@ * Matt Gruenke, April 2012. * Bruce Merry, February 2013. * Tom Deakin and Simon McIntosh-Smith, July 2013 - * - * \version 2.0.5 - * \date 2015-08-10 + * James Price, June-November 2015 + * + * \version 2.0.9 + * \date 2015-12-07 * * Optional extension support * * cl_ext_device_fission * #define CL_HPP_USE_CL_DEVICE_FISSION * cl_khr_d3d10_sharing - * #define USE_DX_INTEROP + * #define CL_HPP_USE_DX_INTEROP + * cl_khr_sub_groups + * #define CL_HPP_USE_CL_SUB_GROUPS_KHR */ /*! \mainpage @@ -54,7 +62,7 @@ * The interface is contained with a single C++ header file \em cl2.hpp and all * definitions are contained within the namespace \em cl. There is no additional * requirement to include \em cl.h and to use either the C++ or original C - * bindings; it is enough to simply include \em cl.hpp. + * bindings; it is enough to simply include \em cl2.hpp. * * The bindings themselves are lightweight and correspond closely to the * underlying C API. Using the C++ bindings introduces no additional execution @@ -96,7 +104,7 @@ * by default. * In all cases these standard library classes can be replaced with * custom interface-compatible versions using the CL_HPP_NO_STD_ARRAY, - * CL_HPP_NO_STD_VECTOR, CL_HPP_NO_STD_SHARED_PTR and + * CL_HPP_NO_STD_VECTOR, CL_HPP_NO_STD_UNIQUE_PTR and * CL_HPP_NO_STD_STRING macros. * * The OpenCL 1.x versions of the C++ bindings included a size_t wrapper @@ -141,8 +149,8 @@ * - Do not use the standard library array class. * cl::array is not defined and may be defined by the user before * cl2.hpp is included. - * CL_HPP_NO_STD_SHARED_PTR - * - Do not use the standard library shared_ptr class. + * CL_HPP_NO_STD_UNIQUE_PTR + * - Do not use the standard library unique_ptr class. * cl::pointer and the cl::allocate_pointer function are not defined * and may be defined by the user before cl2.hpp is included. * CL_HPP_ENABLE_DEVICE_FISSION @@ -263,7 +271,7 @@ cl::pointer anSVMInt = cl::allocate_svm>(); *anSVMInt = 5; cl::SVMAllocator>> svmAllocReadOnly; - cl::pointer fooPointer = cl::allocate_pointer(svmAllocReadOnly); + auto fooPointer = cl::allocate_pointer(svmAllocReadOnly); fooPointer->bar = anSVMInt.get(); cl::SVMAllocator> svmAlloc; std::vector>> inputA(numElements, 1, svmAlloc); @@ -280,10 +288,10 @@ // Default command queue, also passed in as a parameter cl::DeviceCommandQueue defaultDeviceQueue = cl::DeviceCommandQueue::makeDefault( cl::Context::getDefault(), cl::Device::getDefault()); - + auto vectorAddKernel = cl::KernelFunctor< - cl::pointer, + decltype(fooPointer)&, int*, cl::coarse_svm_vector&, cl::Buffer, @@ -296,6 +304,12 @@ // This one was not passed as a parameter vectorAddKernel.setSVMPointers(anSVMInt); + // Hand control of coarse allocations to runtime + cl::enqueueUnmapSVM(anSVMInt); + cl::enqueueUnmapSVM(fooPointer); + cl::unmapSVM(inputB); + cl::unmapSVM(output2); + cl_int error; vectorAddKernel( cl::EnqueueArgs( @@ -312,6 +326,8 @@ ); cl::copy(outputBuffer, begin(output), end(output)); + // Grab the SVM output vector using a map + cl::mapSVM(output2); cl::Device d = cl::Device::getDefault(); @@ -516,22 +532,13 @@ namespace cl { #if CL_HPP_TARGET_OPENCL_VERSION >= 200 -#if !defined(CL_HPP_NO_STD_SHARED_PTR) +#if !defined(CL_HPP_NO_STD_UNIQUE_PTR) #include namespace cl { - // Replace shared_ptr and allocate_ptr for internal use + // Replace unique_ptr and allocate_pointer for internal use // to allow user to replace them - template - using pointer = std::shared_ptr; - - template - auto allocate_pointer(const Alloc &alloc, Args&&... args) -> - decltype(std::allocate_shared( - alloc, std::forward(args)...)) - { - return std::allocate_shared( - alloc, std::forward(args)...); - } + template + using pointer = std::unique_ptr; } // namespace cl #endif #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 @@ -540,7 +547,7 @@ namespace cl { namespace cl { template < class T, size_type N > using array = std::array; -} +} // namespace cl #endif // #if !defined(CL_HPP_NO_STD_ARRAY) // Define size_type appropriately to allow backward-compatibility @@ -608,11 +615,11 @@ namespace cl { #endif // #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) // Helper alias to avoid confusing the macros -namespace cl{ +namespace cl { namespace detail { using size_t_array = array; - } -} + } // namespace detail +} // namespace cl /*! \namespace cl @@ -809,6 +816,7 @@ static inline cl_int errHandler (cl_int err, const char * errStr = NULL) #define __ENQUEUE_ACQUIRE_GL_ERR CL_HPP_ERR_STR_(clEnqueueAcquireGLObjects) #define __ENQUEUE_RELEASE_GL_ERR CL_HPP_ERR_STR_(clEnqueueReleaseGLObjects) +#define __CREATE_PIPE_ERR CL_HPP_ERR_STR_(clCreatePipe) #define __GET_PIPE_INFO_ERR CL_HPP_ERR_STR_(clGetPipeInfo) @@ -850,9 +858,6 @@ static inline cl_int errHandler (cl_int err, const char * errStr = NULL) #define __CREATE_SAMPLER_ERR CL_HPP_ERR_STR_(clCreateSampler) #endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -#endif // CL_HPP_USER_OVERRIDE_ERROR_STRINGS -//! \endcond - /** * CL 1.2 marker and barrier commands */ @@ -861,6 +866,9 @@ static inline cl_int errHandler (cl_int err, const char * errStr = NULL) #define __ENQUEUE_BARRIER_WAIT_LIST_ERR CL_HPP_ERR_STR_(clEnqueueBarrierWithWaitList) #endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#endif // CL_HPP_USER_OVERRIDE_ERROR_STRINGS +//! \endcond + namespace detail { @@ -978,13 +986,18 @@ inline cl_int getInfoHelper(Func f, cl_uint name, string* param, long) // std::string has a constant data member // a char vector does not - vector value(required); - err = f(name, required, value.data(), NULL); - if (err != CL_SUCCESS) { - return err; + if (required > 0) { + vector value(required); + err = f(name, required, value.data(), NULL); + if (err != CL_SUCCESS) { + return err; + } + if (param) { + param->assign(begin(value), prev(end(value))); + } } - if (param) { - param->assign(begin(value), prev(end(value))); + else if (param) { + param->assign(""); } return CL_SUCCESS; } @@ -1132,15 +1145,12 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ F(cl_image_info, CL_IMAGE_WIDTH, size_type) \ F(cl_image_info, CL_IMAGE_HEIGHT, size_type) \ F(cl_image_info, CL_IMAGE_DEPTH, size_type) \ - F(cl_image_info, CL_IMAGE_ARRAY_SIZE, size_type) \ - F(cl_image_info, CL_IMAGE_NUM_MIP_LEVELS, cl_uint) \ - F(cl_image_info, CL_IMAGE_NUM_SAMPLES, cl_uint) \ \ F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ - F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \ - F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \ - F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \ + F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_bool) \ + F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_addressing_mode) \ + F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_filter_mode) \ \ F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ @@ -1204,14 +1214,19 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \ F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, string) \ F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, string) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_QUALIFIER, cl_kernel_arg_type_qualifier) \ \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl_device_id) \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl::Device) \ F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, cl::vector) \ F(cl_device_info, CL_DEVICE_PARTITION_TYPE, cl::vector) \ F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \ F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, size_type) \ F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ - F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, string) + F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, string) \ + \ + F(cl_image_info, CL_IMAGE_ARRAY_SIZE, size_type) \ + F(cl_image_info, CL_IMAGE_NUM_MIP_LEVELS, cl_uint) \ + F(cl_image_info, CL_IMAGE_NUM_SAMPLES, cl_uint) #define CL_HPP_PARAM_NAME_INFO_2_0_(F) \ F(cl_device_info, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, cl_command_queue_properties) \ @@ -1224,6 +1239,12 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ F(cl_device_info, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, cl_uint) \ F(cl_device_info, CL_DEVICE_PIPE_MAX_PACKET_SIZE, cl_uint) \ F(cl_device_info, CL_DEVICE_SVM_CAPABILITIES, cl_device_svm_capabilities) \ + F(cl_device_info, CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, cl_uint) \ + F(cl_command_queue_info, CL_QUEUE_SIZE, cl_uint) \ + F(cl_mem_info, CL_MEM_USES_SVM_POINTER, cl_bool) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, size_type) \ F(cl_pipe_info, CL_PIPE_PACKET_SIZE, cl_uint) \ F(cl_pipe_info, CL_PIPE_MAX_PACKETS, cl_uint) @@ -1633,6 +1654,11 @@ public: cl_type& operator ()() { return object_; } + const cl_type get() const { return object_; } + + cl_type get() { return object_; } + + protected: template friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); @@ -1757,6 +1783,10 @@ public: cl_type& operator ()() { return object_; } + const cl_type get() const { return object_; } + + cl_type get() { return object_; } + protected: template friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); @@ -3289,7 +3319,27 @@ public: } }; +// Pre-declare SVM map function +template +inline cl_int enqueueMapSVM( + T* ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events = NULL, + Event* event = NULL); +/** + * STL-like allocator class for managing SVM objects provided for convenience. + * + * Note that while this behaves like an allocator for the purposes of constructing vectors and similar objects, + * care must be taken when using with smart pointers. + * The allocator should not be used to construct a unique_ptr if we are using coarse-grained SVM mode because + * the coarse-grained management behaviour would behave incorrectly with respect to reference counting. + * + * Instead the allocator embeds a Deleter which may be used with unique_ptr and is used + * with the allocate_shared and allocate_ptr supplied operations. + */ template class SVMAllocator { private: @@ -3313,7 +3363,7 @@ public: template friend class SVMAllocator; - explicit SVMAllocator() : + SVMAllocator() : context_(Context::getDefault()) { } @@ -3322,7 +3372,8 @@ public: context_(context) { } - + + SVMAllocator(const SVMAllocator &other) : context_(other.context_) { @@ -3351,17 +3402,20 @@ public: /** * Allocate an SVM pointer. * + * If the allocator is coarse-grained, this will take ownership to allow + * containers to correctly construct data in place. */ pointer allocate( size_type size, typename cl::SVMAllocator::const_pointer = 0) { + // Allocate memory with default alignment matching the size of the type void* voidPointer = clSVMAlloc( context_(), SVMTrait::getSVMMemFlags(), size*sizeof(T), - 0); + sizeof(T)); pointer retValue = reinterpret_cast( voidPointer); #if defined(CL_HPP_ENABLE_EXCEPTIONS) @@ -3371,6 +3425,15 @@ public: } #endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS) + // If allocation was coarse-grained then map it + if (!(SVMTrait::getSVMMemFlags() & CL_MEM_SVM_FINE_GRAIN_BUFFER)) { + cl_int err = enqueueMapSVM(retValue, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, size*sizeof(T)); + if (err != CL_SUCCESS) { + std::bad_alloc excep; + throw excep; + } + } + // If exceptions disabled, return null pointer from allocator return retValue; } @@ -3421,7 +3484,7 @@ public: { return !operator==(a); } -}; // class SVMAllocator +}; // class SVMAllocator return cl::pointer(tmp, detail::Deleter{alloc, copies}); template @@ -3441,19 +3504,79 @@ public: friend class SVMAllocator; }; +#if !defined(CL_HPP_NO_STD_UNIQUE_PTR) +namespace detail +{ + template + class Deleter { + private: + Alloc alloc_; + size_type copies_; + + public: + typedef typename std::allocator_traits::pointer pointer; + + Deleter(const Alloc &alloc, size_type copies) : alloc_{ alloc }, copies_{ copies } + { + } + + void operator()(pointer ptr) const { + Alloc tmpAlloc{ alloc_ }; + std::allocator_traits::destroy(tmpAlloc, std::addressof(*ptr)); + std::allocator_traits::deallocate(tmpAlloc, ptr, copies_); + } + }; +} // namespace detail + +/** + * Allocation operation compatible with std::allocate_ptr. + * Creates a unique_ptr by default. + * This requirement is to ensure that the control block is not + * allocated in memory inaccessible to the host. + */ +template +cl::pointer> allocate_pointer(const Alloc &alloc_, Args&&... args) +{ + Alloc alloc(alloc_); + static const size_t copies = 1; + + // Ensure that creation of the management block and the + // object are dealt with separately such that we only provide a deleter + + T* tmp = std::allocator_traits::allocate(alloc, copies); + if (!tmp) { + std::bad_alloc excep; + throw excep; + } + try { + std::allocator_traits::construct( + alloc, + std::addressof(*tmp), + std::forward(args)...); + + return cl::pointer>(tmp, detail::Deleter{alloc, copies}); + } + catch (std::bad_alloc b) + { + std::allocator_traits::deallocate(alloc, tmp, copies); + throw; + } +} + template< class T, class SVMTrait, class... Args > -cl::pointer allocate_svm(Args... args) +cl::pointer>> allocate_svm(Args... args) { SVMAllocator alloc; return cl::allocate_pointer(alloc, args...); } template< class T, class SVMTrait, class... Args > -cl::pointer allocate_svm(const cl::Context &c, Args... args) +cl::pointer>> allocate_svm(const cl::Context &c, Args... args) { SVMAllocator alloc(c); return cl::allocate_pointer(alloc, args...); } +#endif // #if !defined(CL_HPP_NO_STD_UNIQUE_PTR) /*! \brief Vector alias to simplify contruction of coarse-grained SVM containers. * @@ -5116,7 +5239,7 @@ public: cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS; object_ = ::clCreatePipe(context(), flags, packet_size, max_packets, nullptr, &error); - detail::errHandler(error, __CREATE_BUFFER_ERR); + detail::errHandler(error, __CREATE_PIPE_ERR); if (err != NULL) { *err = error; } @@ -5142,7 +5265,7 @@ public: cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS; object_ = ::clCreatePipe(context(), flags, packet_size, max_packets, nullptr, &error); - detail::errHandler(error, __CREATE_BUFFER_ERR); + detail::errHandler(error, __CREATE_PIPE_ERR); if (err != NULL) { *err = error; } @@ -5205,7 +5328,7 @@ public: { return detail::errHandler( detail::getInfo(&::clGetPipeInfo, object_, name, param), - __GET_MEM_OBJECT_INFO_ERR); + __GET_PIPE_INFO_ERR); } //! \brief Wrapper for clGetMemObjectInfo() that returns by value. @@ -5653,8 +5776,8 @@ public: #if CL_HPP_TARGET_OPENCL_VERSION >= 200 /*! \brief setArg overload taking a shared_ptr type */ - template - cl_int setArg(cl_uint index, const cl::pointer argPtr) + template + cl_int setArg(cl_uint index, const cl::pointer &argPtr) { return detail::errHandler( ::clSetKernelArgSVMPointer(object_, index, argPtr.get()), @@ -5670,11 +5793,9 @@ public: ::clSetKernelArgSVMPointer(object_, index, argPtr.data()), __SET_KERNEL_ARGS_ERR); } -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /*! \brief setArg overload taking a pointer type - */ + */ template typename std::enable_if::value, cl_int>::type setArg(cl_uint index, const T argPtr) @@ -5761,8 +5882,8 @@ public: ); } - template - void setSVMPointersHelper(std::array &pointerList, pointer &t0, Ts... ts) + template + void setSVMPointersHelper(std::array &pointerList, const pointer &t0, Ts... ts) { pointerList[index] = static_cast(t0.get()); setSVMPointersHelper(ts...); @@ -5776,8 +5897,8 @@ public: setSVMPointersHelper(ts...); } - template - void setSVMPointersHelper(std::array &pointerList, pointer &t0) + template + void setSVMPointersHelper(std::array &pointerList, const pointer &t0) { pointerList[index] = static_cast(t0.get()); } @@ -5790,7 +5911,7 @@ public: } template - cl_int setSVMPointers(T0 t0, Ts... ts) + cl_int setSVMPointers(const T0 &t0, Ts... ts) { std::array pointerList; @@ -6436,6 +6557,18 @@ inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) } +enum class QueueProperties : cl_command_queue_properties +{ + None = 0, + Profiling = CL_QUEUE_PROFILING_ENABLE, + OutOfOrder = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, +}; + +inline QueueProperties operator|(QueueProperties lhs, QueueProperties rhs) +{ + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + /*! \class CommandQueue * \brief CommandQueue interface for cl_command_queue. */ @@ -6467,7 +6600,7 @@ private: default_error_ = error; } else { - Device device = context.getInfo()[0]; + Device device = Device::getDefault(); default_ = CommandQueue(context, device, 0, &default_error_); } } @@ -6499,6 +6632,7 @@ public: default_ = CommandQueue(); } #endif // #ifdef CL_HPP_UNIT_TEST_ENABLE + /*! * \brief Constructs a CommandQueue based on passed properties. @@ -6548,6 +6682,51 @@ public: } } + /*! + * \brief Constructs a CommandQueue based on passed properties. + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + CommandQueue( + QueueProperties properties, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo()[0]; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; + + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } +#else + object_ = ::clCreateCommandQueue( + context(), device(), static_cast(properties), &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } +#endif + } + } + /*! * \brief Constructs a CommandQueue for an implementation defined device in the given context * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. @@ -6598,6 +6777,51 @@ public: } + /*! + * \brief Constructs a CommandQueue for an implementation defined device in the given context + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + explicit CommandQueue( + const Context& context, + QueueProperties properties, + cl_int* err = NULL) + { + cl_int error; + vector devices; + error = context.getInfo(CL_CONTEXT_DEVICES, &devices); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) + { + if (err != NULL) { + *err = error; + } + return; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), devices[0](), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } +#else + object_ = ::clCreateCommandQueue( + context(), devices[0](), static_cast(properties), &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } +#endif + + } + /*! * \brief Constructs a CommandQueue for a passed device and context * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. @@ -6613,14 +6837,9 @@ public: #if CL_HPP_TARGET_OPENCL_VERSION >= 200 cl_queue_properties queue_properties[] = { CL_QUEUE_PROPERTIES, properties, 0 }; - if ((properties & CL_QUEUE_ON_DEVICE) == 0) { - object_ = ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error); - } - else { - error = CL_INVALID_QUEUE_PROPERTIES; - } - + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); if (err != NULL) { *err = error; @@ -6636,6 +6855,39 @@ public: #endif } + /*! + * \brief Constructs a CommandQueue for a passed device and context + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + CommandQueue( + const Context& context, + const Device& device, + QueueProperties properties, + cl_int* err = NULL) + { + cl_int error; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } +#else + object_ = ::clCreateCommandQueue( + context(), device(), static_cast(properties), &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } +#endif + } + static CommandQueue getDefault(cl_int * err = NULL) { std::call_once(default_initialized_, makeDefault); @@ -6654,7 +6906,7 @@ public: * Modify the default command queue to be used by * subsequent operations. * Will only set the default if no default was previously created. - * @return updated default platform. + * @return updated default command queue. * Should be compared to the passed value to ensure that it was updated. */ static CommandQueue setDefault(const CommandQueue &default_queue) @@ -7282,6 +7534,89 @@ public: return result; } +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /** + * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. + * This variant takes a raw SVM pointer. + */ + template + cl_int enqueueMapSVM( + T* ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler(::clEnqueueSVMMap( + object_, blocking, flags, static_cast(ptr), size, + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MAP_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + + /** + * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. + * This variant takes a cl::pointer instance. + */ + template + cl_int enqueueMapSVM( + cl::pointer &ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler(::clEnqueueSVMMap( + object_, blocking, flags, static_cast(ptr.get()), size, + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MAP_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. + * This variant takes a cl::vector instance. + */ + template + cl_int enqueueMapSVM( + cl::vector &container, + cl_bool blocking, + cl_map_flags flags, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler(::clEnqueueSVMMap( + object_, blocking, flags, static_cast(container.data()), container.size(), + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MAP_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + cl_int enqueueUnmapMemObject( const Memory& memory, void* mapped_ptr, @@ -7303,6 +7638,84 @@ public: return err; } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /** + * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. + * This variant takes a raw SVM pointer. + */ + template + cl_int enqueueUnmapSVM( + T* ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMUnmap( + object_, static_cast(ptr), + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. + * This variant takes a cl::pointer instance. + */ + template + cl_int enqueueUnmapSVM( + cl::pointer &ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMUnmap( + object_, static_cast(ptr.get()), + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. + * This variant takes a cl::vector instance. + */ + template + cl_int enqueueUnmapSVM( + cl::vector &container, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMUnmap( + object_, static_cast(container.data()), + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + #if CL_HPP_TARGET_OPENCL_VERSION >= 120 /** * Enqueues a marker command which waits for either a list of events to complete, @@ -7668,25 +8081,43 @@ CL_HPP_DEFINE_STATIC_MEMBER_ cl_int CommandQueue::default_error_ = CL_SUCCESS; #if CL_HPP_TARGET_OPENCL_VERSION >= 200 +enum class DeviceQueueProperties : cl_command_queue_properties +{ + None = 0, + Profiling = CL_QUEUE_PROFILING_ENABLE, +}; + +inline DeviceQueueProperties operator|(DeviceQueueProperties lhs, DeviceQueueProperties rhs) +{ + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + /*! \class DeviceCommandQueue * \brief DeviceCommandQueue interface for device cl_command_queues. */ class DeviceCommandQueue : public detail::Wrapper { public: + /*! - * Default construct device command queue on default context and device - */ - DeviceCommandQueue(cl_int* err = NULL) + * Trivial empty constructor to create a null queue. + */ + DeviceCommandQueue() { } + + /*! + * Default construct device command queue on default context and device + */ + DeviceCommandQueue(DeviceQueueProperties properties, cl_int* err = NULL) { cl_int error; cl::Context context = cl::Context::getDefault(); cl::Device device = cl::Device::getDefault(); - - cl_command_queue_properties properties = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE; + + cl_command_queue_properties mergedProperties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); + cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, properties, 0 }; + CL_QUEUE_PROPERTIES, mergedProperties, 0 }; object_ = ::clCreateCommandQueueWithProperties( context(), device(), queue_properties, &error); @@ -7702,14 +8133,15 @@ public: DeviceCommandQueue( const Context& context, const Device& device, + DeviceQueueProperties properties = DeviceQueueProperties::None, cl_int* err = NULL) { cl_int error; - cl_command_queue_properties properties = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE; + cl_command_queue_properties mergedProperties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, properties, 0 }; + CL_QUEUE_PROPERTIES, mergedProperties, 0 }; object_ = ::clCreateCommandQueueWithProperties( context(), device(), queue_properties, &error); @@ -7726,14 +8158,15 @@ public: const Context& context, const Device& device, cl_uint queueSize, + DeviceQueueProperties properties = DeviceQueueProperties::None, cl_int* err = NULL) { cl_int error; - cl_command_queue_properties properties = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE; + cl_command_queue_properties mergedProperties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, properties, + CL_QUEUE_PROPERTIES, mergedProperties, CL_QUEUE_SIZE, queueSize, 0 }; object_ = ::clCreateCommandQueueWithProperties( @@ -8080,6 +8513,81 @@ inline void* enqueueMapBuffer( return result; } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +/** + * Enqueues to the default queue a command that will allow the host to + * update a region of a coarse-grained SVM buffer. + * This variant takes a raw SVM pointer. + */ +template +inline cl_int enqueueMapSVM( + T* ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events, + Event* event) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + } + + return queue.enqueueMapSVM( + ptr, blocking, flags, size, events, event); +} + +/** + * Enqueues to the default queue a command that will allow the host to + * update a region of a coarse-grained SVM buffer. + * This variant takes a cl::pointer instance. + */ +template +inline cl_int enqueueMapSVM( + cl::pointer ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + } + + return queue.enqueueMapSVM( + ptr, blocking, flags, size, events, event); +} + +/** + * Enqueues to the default queue a command that will allow the host to + * update a region of a coarse-grained SVM buffer. + * This variant takes a cl::vector instance. + */ +template +inline cl_int enqueueMapSVM( + cl::vector container, + cl_bool blocking, + cl_map_flags flags, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + } + + return queue.enqueueMapSVM( + container, blocking, flags, events, event); +} + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + inline cl_int enqueueUnmapMemObject( const Memory& memory, void* mapped_ptr, @@ -8096,10 +8604,10 @@ inline cl_int enqueueUnmapMemObject( cl_event tmp; cl_int err = detail::errHandler( ::clEnqueueUnmapMemObject( - queue(), memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), + queue(), memory(), mapped_ptr, + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), __ENQUEUE_UNMAP_MEM_OBJECT_ERR); if (event != NULL && err == CL_SUCCESS) @@ -8108,6 +8616,73 @@ inline cl_int enqueueUnmapMemObject( return err; } +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +/** + * Enqueues to the default queue a command that will release a coarse-grained + * SVM buffer back to the OpenCL runtime. + * This variant takes a raw SVM pointer. + */ +template +inline cl_int enqueueUnmapSVM( + T* ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + } + + return detail::errHandler(queue.enqueueUnmapSVM(ptr, events, event), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + +} + +/** + * Enqueues to the default queue a command that will release a coarse-grained + * SVM buffer back to the OpenCL runtime. + * This variant takes a cl::pointer instance. + */ +template +inline cl_int enqueueUnmapSVM( + cl::pointer &ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + } + + return detail::errHandler(queue.enqueueUnmapSVM(ptr, events, event), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); +} + +/** + * Enqueues to the default queue a command that will release a coarse-grained + * SVM buffer back to the OpenCL runtime. + * This variant takes a cl::vector instance. + */ +template +inline cl_int enqueueUnmapSVM( + cl::vector &container, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + } + + return detail::errHandler(queue.enqueueUnmapSVM(container, events, event), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); +} + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + inline cl_int enqueueCopyBuffer( const Buffer& src, const Buffer& dst, @@ -8229,6 +8804,28 @@ inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, Iterato return CL_SUCCESS; } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +/** + * Blocking SVM map operation - performs a blocking map underneath. + */ +template +inline cl_int mapSVM(cl::vector &container) +{ + return enqueueMapSVM(container, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE); +} + +/** +* Blocking SVM map operation - performs a blocking map underneath. +*/ +template +inline cl_int unmapSVM(cl::vector &container) +{ + return enqueueUnmapSVM(container); +} + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + #if CL_HPP_TARGET_OPENCL_VERSION >= 110 inline cl_int enqueueReadBufferRect( const Buffer& buffer, @@ -8785,7 +9382,7 @@ public: } template - cl_int setSVMPointers(T0 t0, T1s... ts) + cl_int setSVMPointers(const T0 &t0, T1s... ts) { return kernel_.setSVMPointers(t0, ts...); } @@ -8908,6 +9505,10 @@ namespace compatibility { #undef __UNLOAD_COMPILER_ERR #undef __CREATE_SUB_DEVICES_ERR + +#undef __CREATE_PIPE_ERR +#undef __GET_PIPE_INFO_ERR + #endif //CL_HPP_USER_OVERRIDE_ERROR_STRINGS // Extensions