From 2cd52228b733368e8cd57075f444c53f113c1f1a Mon Sep 17 00:00:00 2001
From: Tom Lin <tom91136@gmail.com>
Date: Wed, 22 Dec 2021 12:20:33 +0000
Subject: [PATCH] Revert "Add oneDPL support for std-data and std-indices"

This reverts commit 6ea09a9620674d1bf2626cacda582b7ec6256ee4.
---
 CMakeLists.txt                       |   2 +-
 src/std-data/STDDataStream.cpp       |  41 ++++-------
 src/std-data/STDDataStream.h         |  52 ++------------
 src/std-data/model.cmake             |  50 -------------
 src/std-indices/STDIndicesStream.cpp |  51 +++++---------
 src/std-indices/STDIndicesStream.h   | 102 +++++++++------------------
 src/std-indices/model.cmake          |  50 -------------
 7 files changed, 74 insertions(+), 274 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index edf05f5..3730602 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
 project(BabelStream VERSION 3.5 LANGUAGES CXX)
 
 # uncomment for debugging build issues:
-set(CMAKE_VERBOSE_MAKEFILE ON)
+#set(CMAKE_VERBOSE_MAKEFILE ON)
 
 # some nicer defaults for standard C++
 set(CMAKE_CXX_EXTENSIONS OFF)
diff --git a/src/std-data/STDDataStream.cpp b/src/std-data/STDDataStream.cpp
index 0be23a3..a2628e1 100644
--- a/src/std-data/STDDataStream.cpp
+++ b/src/std-data/STDDataStream.cpp
@@ -5,31 +5,21 @@
 // source code
 
 #include "STDDataStream.h"
-#include <iostream>
+
+#include <algorithm>
+#include <execution>
+#include <numeric>
+
+// There are three execution policies:
+// auto exe_policy = std::execution::seq;
+// auto exe_policy = std::execution::par;
+auto exe_policy = std::execution::par_unseq;
+
 
 template <class T>
-STDDataStream<T>::STDDataStream(const int ARRAY_SIZE, int device) : array_size{ARRAY_SIZE},
-#if defined(ONEDPL_USE_DPCPP_BACKEND)
-    exe_policy(oneapi::dpl::execution::make_device_policy(cl::sycl::default_selector{})),
-    allocator(exe_policy.queue()),
-    a(array_size, allocator), b(array_size, allocator), c(array_size, allocator)
-#else
-    a(array_size), b(array_size),c(array_size)
-#endif
+STDDataStream<T>::STDDataStream(const int ARRAY_SIZE, int device)
+  noexcept : array_size{ARRAY_SIZE}, a(array_size), b(array_size), c(array_size)
 {
-#if USE_ONEDPL
-  std::cout << "Using oneDPL backend: ";
-  #if defined(ONEDPL_USE_DPCPP_BACKEND)
-    std::cout << "SYCL USM (device=" << exe_policy.queue().get_device().get_info<sycl::info::device::name>() << ")";
-  #elif defined(ONEDPL_USE_TBB_BACKEND)
-    std::cout << "TBB";
-  #elif defined(ONEDPL_USE_OPENMP_BACKEND)
-    std::cout << "OpenMP";
-  #else
-    std::cout << "Default";
-  #endif
-  std::cout << std::endl;
-#endif
 }
 
 template <class T>
@@ -43,10 +33,9 @@ void STDDataStream<T>::init_arrays(T initA, T initB, T initC)
 template <class T>
 void STDDataStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
 {
-  // operator = is deleted because h_* vectors may have different allocator type compared to ours
-  std::copy(a.begin(), a.end(), h_a.begin());
-  std::copy(b.begin(), b.end(), h_b.begin());
-  std::copy(c.begin(), c.end(), h_c.begin());
+  h_a = a;
+  h_b = b;
+  h_c = c;
 }
 
 template <class T>
diff --git a/src/std-data/STDDataStream.h b/src/std-data/STDDataStream.h
index c9a1f43..f8bc302 100644
--- a/src/std-data/STDDataStream.h
+++ b/src/std-data/STDDataStream.h
@@ -6,35 +6,12 @@
 
 #pragma once
 
+#include <iostream>
+#include <stdexcept>
 #include "Stream.h"
 
 #define IMPLEMENTATION_STRING "STD (data-oriented)"
 
-#ifdef USE_ONEDPL
-    #define PSTL_USAGE_WARNINGS 1
-
-    #include <oneapi/dpl/execution>
-    #include <oneapi/dpl/iterator>
-    #include <oneapi/dpl/algorithm>
-    #include <oneapi/dpl/memory>
-    #include <oneapi/dpl/numeric>
-
-    #ifdef ONEDPL_USE_DPCPP_BACKEND
-        #include <CL/sycl.hpp>
-    #endif
-#else
-    #include <algorithm>
-    #include <execution>
-    #include <numeric>
-
-    #if defined(ONEDPL_USE_DPCPP_BACKEND) || \
-        defined(ONEDPL_USE_TBB_BACKEND)   || \
-        defined(ONEDPL_USE_OPENMP_BACKEND)
-        #error oneDPL missing (ONEDPL_VERSION_MAJOR not defined) but backend (ONEDPL_USE_*_BACKEND) specified
-    #endif
-
-#endif
-
 
 template <class T>
 class STDDataStream : public Stream<T>
@@ -43,31 +20,14 @@ class STDDataStream : public Stream<T>
     // Size of arrays
     int array_size;
 
-#if defined(ONEDPL_USE_DPCPP_BACKEND)
-    // SYCL oneDPL backend
-    using ExecutionPolicy = oneapi::dpl::execution::device_policy<>;
-    using Allocator = sycl::usm_allocator<T, sycl::usm::alloc::shared>;
-#elif defined(USE_ONEDPL)
-    // every other non-SYCL oneDPL backend (i.e TBB, OMP)
-    using ExecutionPolicy = decltype(oneapi::dpl::execution::par_unseq);
-    using Allocator = std::allocator<T>;
-#else
-    // normal std execution policies
-    using ExecutionPolicy = decltype(std::execution::par_unseq);
-    using Allocator = std::allocator<T>;
-#endif
-
-    ExecutionPolicy exe_policy{};
-    Allocator allocator;
-
     // Device side pointers
-    std::vector<T, Allocator> a;
-    std::vector<T, Allocator> b;
-    std::vector<T, Allocator> c;
+    std::vector<T> a;
+    std::vector<T> b;
+    std::vector<T> c;
 
 
   public:
-    STDDataStream(const int, int);
+    STDDataStream(const int, int) noexcept;
     ~STDDataStream() = default;
 
     virtual void copy() override;
diff --git a/src/std-data/model.cmake b/src/std-data/model.cmake
index 2454295..ef69f30 100644
--- a/src/std-data/model.cmake
+++ b/src/std-data/model.cmake
@@ -19,30 +19,9 @@ register_flag_optional(NVHPC_OFFLOAD
            ccall - Compile for all supported compute capabilities"
         "")
 
-register_flag_optional(ONEDPL_OFFLOAD
-        "Use the DPC++ oneDPL library which supports STL algorithms on SYCL, TBB, and OpenMP.
-         This option only supports the oneDPL library shipped with oneAPI, and must use the dpcpp
-         compiler (i.e -DCMAKE_CXX_COMPILER=dpcpp) for this option.
-         Make sure your oneAPI installation includes at least the following components: dpcpp, onedpl, onetbb.
-         The env. variable `TBBROOT` needs to point to the base directory of your TBB install (e.g /opt/intel/oneapi/tbb/latest/).
-         This should be done by oneAPI's `setvars.sh` script automatically.
-
-         Possible values are:
-           TBB   - Use the TBB backend, the correct TBB library will be linked from oneAPI automatically.
-           OMP   - Use the OpenMP backend
-           DPCPP - Use the SYCL (via dpcpp) backend with the default selector.
-                  See https://intel.github.io/llvm-docs/EnvironmentVariables.html#sycl-device-filter
-                  on selecting a non-default device or SYCL backend."
-        "")
-
-
 macro(setup)
     set(CMAKE_CXX_STANDARD 17)
 
-    if (NVHPC_OFFLOAD AND ONEDPL_OFFLOAD)
-        message(FATAL_ERROR "NVHPC_OFFLOAD and NVHPC_OFFLOAD are mutually exclusive")
-    endif ()
-
     if (NVHPC_OFFLOAD)
         set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD})
         # propagate flags to linker so that it links with the gpu stuff as well
@@ -51,33 +30,4 @@ macro(setup)
     endif ()
 
 
-    if (ONEDPL_OFFLOAD)
-        set(CXX_EXTRA_FLAGS)
-        set(CXX_EXTRA_LIBRARIES /opt/intel/oneapi/tbb/2021.4.0/lib/intel64/gcc4.8/libtbb.so)
-        # propagate flags to linker so that it links with the gpu stuff as well
-        register_append_cxx_flags(ANY -fopenmp -fsycl-unnamed-lambda -fsycl)
-
-        # XXX see https://www.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-library-guide/top/oneapi-dpc-library-onedpl-overview.html
-        # this is to avoid the system TBB headers (if exists) from having precedence which isn't compatible with oneDPL's par implementation
-        register_definitions(
-                PSTL_USE_PARALLEL_POLICIES=0
-                _GLIBCXX_USE_TBB_PAR_BACKEND=0
-        )
-
-        register_definitions(USE_ONEDPL)
-        if (ONEDPL_OFFLOAD STREQUAL "TBB")
-            register_definitions(ONEDPL_USE_TBB_BACKEND=1)
-        elseif (ONEDPL_OFFLOAD STREQUAL "OPENMP")
-            register_definitions(ONEDPL_USE_OPENMP_BACKEND=1)
-        elseif (ONEDPL_OFFLOAD STREQUAL "SYCL")
-            register_definitions(ONEDPL_USE_DPCPP_BACKEND=1)
-        else ()
-            message(FATAL_ERROR "Unsupported ONEDPL_OFFLOAD backend: ${ONEDPL_OFFLOAD}")
-        endif ()
-
-        # even with the workaround above, -ltbb may still end up with the wrong one, so be explicit here
-        register_link_library($ENV{TBBROOT}/lib/intel64/gcc4.8/libtbb.so)
-
-    endif ()
-
 endmacro()
diff --git a/src/std-indices/STDIndicesStream.cpp b/src/std-indices/STDIndicesStream.cpp
index ceb9d3d..a88dd18 100644
--- a/src/std-indices/STDIndicesStream.cpp
+++ b/src/std-indices/STDIndicesStream.cpp
@@ -5,32 +5,21 @@
 // source code
 
 #include "STDIndicesStream.h"
-#include <iostream>
+
+#include <algorithm>
+#include <execution>
+#include <numeric>
+
+// There are three execution policies:
+// auto exe_policy = std::execution::seq;
+// auto exe_policy = std::execution::par;
+auto exe_policy = std::execution::par_unseq;
+
 
 template <class T>
-STDIndicesStream<T>::STDIndicesStream(const int ARRAY_SIZE, int device) :
-array_size{ARRAY_SIZE}, range_start(0), range_end(array_size),
-#if defined(ONEDPL_USE_DPCPP_BACKEND)
-exe_policy(oneapi::dpl::execution::make_device_policy(cl::sycl::default_selector{})),
-    allocator(exe_policy.queue()),
-    a(array_size, allocator), b(array_size, allocator), c(array_size, allocator)
-#else
-a(array_size), b(array_size),c(array_size)
-#endif
+STDIndicesStream<T>::STDIndicesStream(const int ARRAY_SIZE, int device)
+  noexcept : array_size{ARRAY_SIZE}, range(0, array_size), a(array_size), b(array_size), c(array_size) 
 {
-#if USE_ONEDPL
-    std::cout << "Using oneDPL backend: ";
-  #if defined(ONEDPL_USE_DPCPP_BACKEND)
-    std::cout << "SYCL USM (device=" << exe_policy.queue().get_device().get_info<sycl::info::device::name>() << ")";
-  #elif defined(ONEDPL_USE_TBB_BACKEND)
-    std::cout << "TBB";
-  #elif defined(ONEDPL_USE_OPENMP_BACKEND)
-    std::cout << "OpenMP";
-  #else
-    std::cout << "Default";
-  #endif
-  std::cout << std::endl;
-#endif
 }
 
 template <class T>
@@ -44,13 +33,11 @@ void STDIndicesStream<T>::init_arrays(T initA, T initB, T initC)
 template <class T>
 void STDIndicesStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
 {
-  // operator = is deleted because h_* vectors may have different allocator type compared to ours
-  std::copy(a.begin(), a.end(), h_a.begin());
-  std::copy(b.begin(), b.end(), h_b.begin());
-  std::copy(c.begin(), c.end(), h_c.begin());
+  h_a = a;
+  h_b = b;
+  h_c = c;
 }
 
-
 template <class T>
 void STDIndicesStream<T>::copy()
 {
@@ -62,7 +49,7 @@ template <class T>
 void STDIndicesStream<T>::mul()
 {
   //  b[i] = scalar * c[i];
-  std::transform(exe_policy, range_start, range_end, b.begin(), [&, scalar = startScalar](int i) {
+  std::transform(exe_policy, range.begin(), range.end(), b.begin(), [&, scalar = startScalar](int i) {
     return scalar * c[i];
   });
 }
@@ -71,7 +58,7 @@ template <class T>
 void STDIndicesStream<T>::add()
 {
   //  c[i] = a[i] + b[i];
-  std::transform(exe_policy, range_start, range_end, c.begin(), [&](int i) {
+  std::transform(exe_policy, range.begin(), range.end(), c.begin(), [&](int i) {
     return a[i] + b[i];
   });
 }
@@ -80,7 +67,7 @@ template <class T>
 void STDIndicesStream<T>::triad()
 {
   //  a[i] = b[i] + scalar * c[i];
-  std::transform(exe_policy, range_start, range_end, a.begin(), [&, scalar = startScalar](int i) {
+  std::transform(exe_policy, range.begin(), range.end(), a.begin(), [&, scalar = startScalar](int i) {
     return b[i] + scalar * c[i];
   });
 }
@@ -92,7 +79,7 @@ void STDIndicesStream<T>::nstream()
   //  Need to do in two stages with C++11 STL.
   //  1: a[i] += b[i]
   //  2: a[i] += scalar * c[i];
-  std::transform(exe_policy, range_start, range_end, a.begin(), [&, scalar = startScalar](int i) {
+  std::transform(exe_policy, range.begin(), range.end(), a.begin(), [&, scalar = startScalar](int i) {
     return a[i] + b[i] + scalar * c[i];
   });
 }
diff --git a/src/std-indices/STDIndicesStream.h b/src/std-indices/STDIndicesStream.h
index dfb63f6..66c8bb0 100644
--- a/src/std-indices/STDIndicesStream.h
+++ b/src/std-indices/STDIndicesStream.h
@@ -6,62 +6,46 @@
 
 #pragma once
 
+#include <iostream>
+#include <stdexcept>
 #include "Stream.h"
 
 #define IMPLEMENTATION_STRING "STD (index-oriented)"
 
 
-
-#ifdef USE_ONEDPL
-    #define PSTL_USAGE_WARNINGS 1
-
-    #include <oneapi/dpl/execution>
-    #include <oneapi/dpl/iterator>
-    #include <oneapi/dpl/algorithm>
-    #include <oneapi/dpl/memory>
-    #include <oneapi/dpl/numeric>
-
-    #ifdef ONEDPL_USE_DPCPP_BACKEND
-        #include <CL/sycl.hpp>
-    #endif
-#else
-    #include <algorithm>
-    #include <execution>
-    #include <numeric>
-
-    #if defined(ONEDPL_USE_DPCPP_BACKEND) || \
-            defined(ONEDPL_USE_TBB_BACKEND)   || \
-            defined(ONEDPL_USE_OPENMP_BACKEND)
-    #error oneDPL missing (ONEDPL_VERSION_MAJOR not defined) but backend (ONEDPL_USE_*_BACKEND) specified
-    #endif
-
-#endif
-
 // A lightweight counting iterator which will be used by the STL algorithms
 // NB: C++ <= 17 doesn't have this built-in, and it's only added later in ranges-v3 (C++2a) which this
 // implementation doesn't target
 template <typename N>
-class ranged_iterator {
-  N num;
+class ranged {
+  N from, to;
 public:
-  using difference_type = N;
-  using value_type = N;
-  using pointer = const N*;
-  using reference = const N&;
-  using iterator_category = std::random_access_iterator_tag;
-  explicit ranged_iterator(N _num = 0) : num(_num) {}
+  ranged(N from, N to ): from(from), to(to) {}
+    class iterator {
+      N num;
+    public:
+      using difference_type = N;
+      using value_type = N;
+      using pointer = const N*;
+      using reference = const N&;
+      using iterator_category = std::random_access_iterator_tag;
+      explicit iterator(N _num = 0) : num(_num) {}
 
-  ranged_iterator<N>& operator++() { num++; return *this; }
-  ranged_iterator<N> operator++(int) { ranged_iterator<N> retval = *this; ++(*this); return retval; }
-  ranged_iterator<N> operator+(const value_type v) const { return ranged_iterator<N>(num + v); }
+      iterator& operator++() { num++; return *this; }
+      iterator operator++(int) { iterator retval = *this; ++(*this); return retval; }
+      iterator operator+(const value_type v) const { return iterator(num + v); }
 
-  bool operator==(ranged_iterator<N> other) const { return num == other.num; }
-  bool operator!=(ranged_iterator<N> other) const { return *this != other; }
-  bool operator<(ranged_iterator<N> other) const { return num < other.num; }
+      bool operator==(iterator other) const { return num == other.num; }
+      bool operator!=(iterator other) const { return *this != other; }
+      bool operator<(iterator other) const { return num < other.num; }
 
-  reference operator*() const { return num;}
-  difference_type operator-(const ranged_iterator<N> &it) const { return num - it.num; }
-  value_type operator[](const difference_type &i) const { return num + i; }
+      reference operator*() const { return num;}
+      difference_type operator-(const iterator &it) const { return num - it.num; }
+      value_type operator[](const difference_type &i) const { return num + i; }
+
+    };
+    iterator begin() { return iterator(from); }
+    iterator end() { return iterator(to >= from? to+1 : to-1); }
 };
 
 template <class T>
@@ -71,37 +55,17 @@ class STDIndicesStream : public Stream<T>
     // Size of arrays
     int array_size;
 
-#if defined(ONEDPL_USE_DPCPP_BACKEND)
-    // SYCL oneDPL backend
-    using ExecutionPolicy = oneapi::dpl::execution::device_policy<>;
-    using Allocator = sycl::usm_allocator<T, sycl::usm::alloc::shared>;
-    using IteratorType = oneapi::dpl::counting_iterator<int>;
-#elif defined(USE_ONEDPL)
-    // every other non-SYCL oneDPL backend (i.e TBB, OMP)
-    using ExecutionPolicy = decltype(oneapi::dpl::execution::par_unseq);
-    using Allocator = std::allocator<T>;
-    using IteratorType = oneapi::dpl::counting_iterator<int>;
-#else
-    // normal std execution policies
-    using ExecutionPolicy = decltype(std::execution::par_unseq);
-    using Allocator = std::allocator<T>;
-    using IteratorType = ranged_iterator<int>;
-#endif
-
-    IteratorType range_start;
-    IteratorType range_end;
-
-    ExecutionPolicy exe_policy{};
-    Allocator allocator;
+    // induction range
+    ranged<int> range;
 
     // Device side pointers
-    std::vector<T, Allocator> a;
-    std::vector<T, Allocator> b;
-    std::vector<T, Allocator> c;
+    std::vector<T> a;
+    std::vector<T> b;
+    std::vector<T> c;
 
 
   public:
-    STDIndicesStream(const int, int);
+    STDIndicesStream(const int, int) noexcept;
     ~STDIndicesStream() = default;
 
     virtual void copy() override;
diff --git a/src/std-indices/model.cmake b/src/std-indices/model.cmake
index 2454295..ef69f30 100644
--- a/src/std-indices/model.cmake
+++ b/src/std-indices/model.cmake
@@ -19,30 +19,9 @@ register_flag_optional(NVHPC_OFFLOAD
            ccall - Compile for all supported compute capabilities"
         "")
 
-register_flag_optional(ONEDPL_OFFLOAD
-        "Use the DPC++ oneDPL library which supports STL algorithms on SYCL, TBB, and OpenMP.
-         This option only supports the oneDPL library shipped with oneAPI, and must use the dpcpp
-         compiler (i.e -DCMAKE_CXX_COMPILER=dpcpp) for this option.
-         Make sure your oneAPI installation includes at least the following components: dpcpp, onedpl, onetbb.
-         The env. variable `TBBROOT` needs to point to the base directory of your TBB install (e.g /opt/intel/oneapi/tbb/latest/).
-         This should be done by oneAPI's `setvars.sh` script automatically.
-
-         Possible values are:
-           TBB   - Use the TBB backend, the correct TBB library will be linked from oneAPI automatically.
-           OMP   - Use the OpenMP backend
-           DPCPP - Use the SYCL (via dpcpp) backend with the default selector.
-                  See https://intel.github.io/llvm-docs/EnvironmentVariables.html#sycl-device-filter
-                  on selecting a non-default device or SYCL backend."
-        "")
-
-
 macro(setup)
     set(CMAKE_CXX_STANDARD 17)
 
-    if (NVHPC_OFFLOAD AND ONEDPL_OFFLOAD)
-        message(FATAL_ERROR "NVHPC_OFFLOAD and NVHPC_OFFLOAD are mutually exclusive")
-    endif ()
-
     if (NVHPC_OFFLOAD)
         set(NVHPC_FLAGS -stdpar -gpu=${NVHPC_OFFLOAD})
         # propagate flags to linker so that it links with the gpu stuff as well
@@ -51,33 +30,4 @@ macro(setup)
     endif ()
 
 
-    if (ONEDPL_OFFLOAD)
-        set(CXX_EXTRA_FLAGS)
-        set(CXX_EXTRA_LIBRARIES /opt/intel/oneapi/tbb/2021.4.0/lib/intel64/gcc4.8/libtbb.so)
-        # propagate flags to linker so that it links with the gpu stuff as well
-        register_append_cxx_flags(ANY -fopenmp -fsycl-unnamed-lambda -fsycl)
-
-        # XXX see https://www.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-library-guide/top/oneapi-dpc-library-onedpl-overview.html
-        # this is to avoid the system TBB headers (if exists) from having precedence which isn't compatible with oneDPL's par implementation
-        register_definitions(
-                PSTL_USE_PARALLEL_POLICIES=0
-                _GLIBCXX_USE_TBB_PAR_BACKEND=0
-        )
-
-        register_definitions(USE_ONEDPL)
-        if (ONEDPL_OFFLOAD STREQUAL "TBB")
-            register_definitions(ONEDPL_USE_TBB_BACKEND=1)
-        elseif (ONEDPL_OFFLOAD STREQUAL "OPENMP")
-            register_definitions(ONEDPL_USE_OPENMP_BACKEND=1)
-        elseif (ONEDPL_OFFLOAD STREQUAL "SYCL")
-            register_definitions(ONEDPL_USE_DPCPP_BACKEND=1)
-        else ()
-            message(FATAL_ERROR "Unsupported ONEDPL_OFFLOAD backend: ${ONEDPL_OFFLOAD}")
-        endif ()
-
-        # even with the workaround above, -ltbb may still end up with the wrong one, so be explicit here
-        register_link_library($ENV{TBBROOT}/lib/intel64/gcc4.8/libtbb.so)
-
-    endif ()
-
 endmacro()