// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith, // University of Bristol HPC // // For full license terms please see the LICENSE file distributed with this // source code #include "SYCLStream.h" #include using namespace cl::sycl; // Cache list of devices bool cached = false; std::vector devices; void getDeviceList(void); template SYCLStream::SYCLStream(const unsigned int ARRAY_SIZE, const int device_index) { if (!cached) getDeviceList(); array_size = ARRAY_SIZE; if (device_index >= devices.size()) throw std::runtime_error("Invalid device index"); device dev = devices[device_index]; // Determine sensible dot kernel NDRange configuration if (dev.is_cpu()) { dot_num_groups = dev.get_info(); dot_wgsize = dev.get_info() * 2; } else { dot_num_groups = dev.get_info() * 4; dot_wgsize = dev.get_info(); } // Print out device information std::cout << "Using SYCL device " << getDeviceName(device_index) << std::endl; std::cout << "Driver: " << getDeviceDriver(device_index) << std::endl; std::cout << "Reduction kernel config: " << dot_num_groups << " groups of size " << dot_wgsize << std::endl; queue = new cl::sycl::queue(dev, [&](cl::sycl::exception_list l) { bool error = false; for(auto e: l) { try { std::rethrow_exception(e); } catch (cl::sycl::exception e) { std::cout << e.what(); error = true; } } if(error) { throw std::runtime_error("SYCL errors detected"); } }, {}); // Create buffers d_a = new buffer(array_size); d_b = new buffer(array_size); d_c = new buffer(array_size); d_sum = new buffer(dot_num_groups); } template SYCLStream::~SYCLStream() { delete d_a; delete d_b; delete d_c; delete d_sum; delete queue; devices.clear(); } template void SYCLStream::copy() { queue->submit([&](handler &cgh) { auto ka = d_a->template get_access(cgh); auto kc = d_c->template get_access(cgh); cgh.parallel_for(range<1>{array_size}, [=](id<1> idx) { kc[idx] = ka[idx]; }); }); queue->wait(); } template void SYCLStream::mul() { const T scalar = startScalar; queue->submit([&](handler &cgh) { auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); cgh.parallel_for(range<1>{array_size}, [=](id<1> idx) { kb[idx] = scalar * kc[idx]; }); }); queue->wait(); } template void SYCLStream::add() { queue->submit([&](handler &cgh) { auto ka = d_a->template get_access(cgh); auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); cgh.parallel_for(range<1>{array_size}, [=](id<1> idx) { kc[idx] = ka[idx] + kb[idx]; }); }); queue->wait(); } template void SYCLStream::triad() { const T scalar = startScalar; queue->submit([&](handler &cgh) { auto ka = d_a->template get_access(cgh); auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); cgh.parallel_for(range<1>{array_size}, [=](id<1> idx) { ka[idx] = kb[idx] + scalar * kc[idx]; }); }); queue->wait(); } template T SYCLStream::dot() { queue->submit([&](handler &cgh) { auto ka = d_a->template get_access(cgh); auto kb = d_b->template get_access(cgh); auto ksum = d_sum->template get_access(cgh); auto wg_sum = accessor(range<1>(dot_wgsize), cgh); size_t N = array_size; cgh.parallel_for(nd_range<1>(dot_num_groups*dot_wgsize, dot_wgsize), [=](nd_item<1> item) { size_t i = item.get_global_id(0); size_t li = item.get_local_id(0); size_t global_size = item.get_global_range()[0]; wg_sum[li] = 0.0; for (; i < N; i += global_size) wg_sum[li] += ka[i] * kb[i]; size_t local_size = item.get_local_range()[0]; for (int offset = local_size / 2; offset > 0; offset /= 2) { item.barrier(cl::sycl::access::fence_space::local_space); if (li < offset) wg_sum[li] += wg_sum[li + offset]; } if (li == 0) ksum[item.get_group(0)] = wg_sum[0]; }); }); T sum = 0.0; auto h_sum = d_sum->template get_access(); for (int i = 0; i < dot_num_groups; i++) { sum += h_sum[i]; } return sum; } template void SYCLStream::init_arrays(T initA, T initB, T initC) { queue->submit([&](handler &cgh) { auto ka = d_a->template get_access(cgh); auto kb = d_b->template get_access(cgh); auto kc = d_c->template get_access(cgh); cgh.parallel_for(range<1>{array_size}, [=](item<1> item) { auto id = item.get_id(0); ka[id] = initA; kb[id] = initB; kc[id] = initC; }); }); queue->wait(); } template void SYCLStream::read_arrays(std::vector& a, std::vector& b, std::vector& c) { auto _a = d_a->template get_access(); auto _b = d_b->template get_access(); auto _c = d_c->template get_access(); for (int i = 0; i < array_size; i++) { a[i] = _a[i]; b[i] = _b[i]; c[i] = _c[i]; } } void getDeviceList(void) { // Get list of platforms std::vector platforms = platform::get_platforms(); // Enumerate devices for (unsigned i = 0; i < platforms.size(); i++) { std::vector plat_devices = platforms[i].get_devices(); devices.insert(devices.end(), plat_devices.begin(), plat_devices.end()); } cached = true; } void listDevices(void) { getDeviceList(); // Print device names if (devices.size() == 0) { std::cerr << "No devices found." << std::endl; } else { std::cout << std::endl; std::cout << "Devices:" << std::endl; for (int i = 0; i < devices.size(); i++) { std::cout << i << ": " << getDeviceName(i) << std::endl; } std::cout << std::endl; } } std::string getDeviceName(const int device) { if (!cached) getDeviceList(); std::string name; if (device < devices.size()) { name = devices[device].get_info(); } else { throw std::runtime_error("Error asking for name for non-existant device"); } return name; } std::string getDeviceDriver(const int device) { if (!cached) getDeviceList(); std::string driver; if (device < devices.size()) { driver = devices[device].get_info(); } else { throw std::runtime_error("Error asking for driver for non-existant device"); } return driver; } // TODO: Fix kernel names to allow multiple template specializations template class SYCLStream; template class SYCLStream;