From fff1b1f84b67ef8c15f0be978324d25d74f188c1 Mon Sep 17 00:00:00 2001
From: Tom Deakin <tom.deakin@bristol.ac.uk>
Date: Mon, 12 Jun 2023 15:49:59 +0100
Subject: [PATCH 01/19] Update CUDAStream.h

---
 src/cuda/CUDAStream.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/cuda/CUDAStream.h b/src/cuda/CUDAStream.h
index 83b8c66..bb3f866 100644
--- a/src/cuda/CUDAStream.h
+++ b/src/cuda/CUDAStream.h
@@ -22,7 +22,7 @@
 #endif
 
 #define TBSIZE 1024
-#define DOT_NUM_BLOCKS 256
+#define DOT_NUM_BLOCKS 1024
 
 template <class T>
 class CUDAStream : public Stream<T>

From c66dcf4d50195de6f4adc9c1cb0297d2a6bfccf8 Mon Sep 17 00:00:00 2001
From: Tom Deakin <tom.deakin@bristol.ac.uk>
Date: Mon, 12 Jun 2023 15:50:52 +0100
Subject: [PATCH 02/19] Revert "Update CUDAStream.h"

This reverts commit fff1b1f84b67ef8c15f0be978324d25d74f188c1.
---
 src/cuda/CUDAStream.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cuda/CUDAStream.h b/src/cuda/CUDAStream.h
index bb3f866..83b8c66 100644
--- a/src/cuda/CUDAStream.h
+++ b/src/cuda/CUDAStream.h
@@ -22,7 +22,7 @@
 #endif
 
 #define TBSIZE 1024
-#define DOT_NUM_BLOCKS 1024
+#define DOT_NUM_BLOCKS 256
 
 template <class T>
 class CUDAStream : public Stream<T>

From 11d2bef837fa8e88e9a04fd8da7f54689b4e9017 Mon Sep 17 00:00:00 2001
From: Kaan Olgu <k.olgu20@gmail.com>
Date: Fri, 16 Jun 2023 19:53:41 +0000
Subject: [PATCH 03/19] OpenMP Instructions for Spack

---
 README.md                  | 30 ++++++++++++++++++++++++++++
 docs/spack_instructions.md | 41 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)
 create mode 100644 docs/spack_instructions.md

diff --git a/README.md b/README.md
index e4c2a15..7e38453 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,7 @@ This code was previously called GPU-STREAM.
 - [How is this different to STREAM?](#how-is-this-different-to-stream)
 - [Building](#building)
     - [CMake](#cmake)
+    - [Spack](#spack)
     - [GNU Make (removed)](#gnu-make)
 - [Results](#results)
 - [Contributing](#contributing)
@@ -138,6 +139,35 @@ Alternatively, refer to the [CI script](./src/ci-test-compile.sh), which test-co
 
 *It is recommended that you delete the `build` directory when you change any of the build flags.*
 
+### Spack
+
+
+The project supports building with Spack >= 0.19.0, which can be installed without root via the [official GitHub repo](https://github.com/spack/spack).
+The BabelStream Spack Package source code could be accessed from the link [here](https://github.com/spack/spack/tree/develop/var/spack/repos/builtin/packages/babelstream/package.py)
+Each BabelStream implementation (programming model) is built as follows:
+
+```shell
+
+# Spack package installation starts with `spack install babelstream` for all programming models
+# The programming model wish to be build needs to be specified with `+` option
+# The model specific flags needs to be specified after defining model
+$ spack install babelstream@<version>%<compiler> +<model> <model specific flags>
+
+
+# The executables will be generated in:
+# SPACK_INSTALL_DIRECTORY/opt/spack/system-name/compiler-name/babelstream-version-identifier/bin/
+# this address will be printed at the end of generation which could be easily copied
+$ cd SPACK_INSTALL_DIRECTORY/opt/spack/system-name/compiler-name/babelstream-version-identifier/bin/
+$ ./<model>-stream
+```
+More detailed examples are provided in [Spack README file]()
+The `MODEL` option selects one implementation of BabelStream to build.
+
+Currently available models are:
+```
+omp;ocl;std;std20;hip;cuda;kokkos;sycl;sycl2020;acc;raja;tbb;thrust
+```
+
 ### GNU Make
 
 Support for Make has been removed from 4.0 onwards.
diff --git a/docs/spack_instructions.md b/docs/spack_instructions.md
new file mode 100644
index 0000000..b6187b3
--- /dev/null
+++ b/docs/spack_instructions.md
@@ -0,0 +1,41 @@
+# Spack Instructions
+
+## Table of contents
+* [OpenMP](#omp)
+* [OpenCL](#ocl)
+* [STD](#std)
+* [STD20](#std20)
+* [Hip](#hip)
+* [Cuda](#cuda)
+* [Kokkos](#kokkos)
+* [Sycl](#sycl)
+* [Sycl2020](#)
+* [ACC](#acc)
+* [Raja](#raja)
+* [Tbb](#tbb)
+* [Thrust](#thrust)
+
+## OpenMP
+
+* There are 3 offloading options for OpenMP: NVIDIA, AMD and Intel. 
+* If a user provides a value for `cuda_arch`, the execution will be automatically offloaded to NVIDIA.
+* If a user provides a value for `amdgpu_target`, the operation will be offloaded to AMD.
+* In the absence of `cuda_arch` and `amdgpu_target`, the execution will be offloaded to Intel.
+
+| Flag        | Definition                      | Options   |
+|-----------| ----------------------------------|-----------|
+| cuda_arch     | List of supported compute capabilities are provided [here](https://github.com/spack/spack/blob/0f271883831bec6da3fc64c92eb1805c39a9f09a/lib/spack/spack/build_systems/cuda.py#LL19C1-L47C6) <br /> Useful [link](https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/) for matching CUDA gencodes with NVIDIA architectures| `cuda_arch=70` |
+|amdgpu_target| List of supported architectures are provided [here](https://github.com/spack/spack/blob/0f271883831bec6da3fc64c92eb1805c39a9f09a/lib/spack/spack/build_systems/rocm.py#LL93C1-L125C19) | 'amdgpu_target=gfx701` |
+
+Example Commandss
+```shell
+# Example 1: for Intel offload
+ $ spack install babelstream%oneapi +omp 
+
+# Example 2: for Nvidia GPU for Volta (sm_70) 
+ $ spack install babelstream +omp cuda_arch=70 
+ 
+# Example 3: for AMD GPU gfx701 
+ $ spack install babelstream +omp amdgpu_target=gfx701 
+```
+

From acf70526c88a20209df323bc471c8c7972d39c9f Mon Sep 17 00:00:00 2001
From: Kaan Olgu <k.olgu20@gmail.com>
Date: Fri, 16 Jun 2023 21:24:06 +0000
Subject: [PATCH 04/19] OpenMP style update

---
 docs/spack_instructions.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/spack_instructions.md b/docs/spack_instructions.md
index b6187b3..e1b0e96 100644
--- a/docs/spack_instructions.md
+++ b/docs/spack_instructions.md
@@ -22,10 +22,10 @@
 * If a user provides a value for `amdgpu_target`, the operation will be offloaded to AMD.
 * In the absence of `cuda_arch` and `amdgpu_target`, the execution will be offloaded to Intel.
 
-| Flag        | Definition                      | Options   |
-|-----------| ----------------------------------|-----------|
-| cuda_arch     | List of supported compute capabilities are provided [here](https://github.com/spack/spack/blob/0f271883831bec6da3fc64c92eb1805c39a9f09a/lib/spack/spack/build_systems/cuda.py#LL19C1-L47C6) <br /> Useful [link](https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/) for matching CUDA gencodes with NVIDIA architectures| `cuda_arch=70` |
-|amdgpu_target| List of supported architectures are provided [here](https://github.com/spack/spack/blob/0f271883831bec6da3fc64c92eb1805c39a9f09a/lib/spack/spack/build_systems/rocm.py#LL93C1-L125C19) | 'amdgpu_target=gfx701` |
+| Flag        | Definition                      | 
+|-----------| ----------------------------------|
+| cuda_arch     |- List of supported compute capabilities are provided [here](https://github.com/spack/spack/blob/0f271883831bec6da3fc64c92eb1805c39a9f09a/lib/spack/spack/build_systems/cuda.py#LL19C1-L47C6) <br />- Useful [link](https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/) for matching CUDA gencodes with NVIDIA architectures| 
+|amdgpu_target| List of supported architectures are provided [here](https://github.com/spack/spack/blob/0f271883831bec6da3fc64c92eb1805c39a9f09a/lib/spack/spack/build_systems/rocm.py#LL93C1-L125C19) | 
 
 Example Commandss
 ```shell

From 22e9339a6ed9900278a3ad21e0da422e4be07a30 Mon Sep 17 00:00:00 2001
From: Kaan Olgu <k.olgu20@gmail.com>
Date: Fri, 16 Jun 2023 22:03:55 +0000
Subject: [PATCH 05/19] OpenCL Instructions for Spack

---
 docs/spack_instructions.md | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/docs/spack_instructions.md b/docs/spack_instructions.md
index e1b0e96..759f276 100644
--- a/docs/spack_instructions.md
+++ b/docs/spack_instructions.md
@@ -27,7 +27,7 @@
 | cuda_arch     |- List of supported compute capabilities are provided [here](https://github.com/spack/spack/blob/0f271883831bec6da3fc64c92eb1805c39a9f09a/lib/spack/spack/build_systems/cuda.py#LL19C1-L47C6) <br />- Useful [link](https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/) for matching CUDA gencodes with NVIDIA architectures| 
 |amdgpu_target| List of supported architectures are provided [here](https://github.com/spack/spack/blob/0f271883831bec6da3fc64c92eb1805c39a9f09a/lib/spack/spack/build_systems/rocm.py#LL93C1-L125C19) | 
 
-Example Commandss
+
 ```shell
 # Example 1: for Intel offload
  $ spack install babelstream%oneapi +omp 
@@ -39,3 +39,28 @@ Example Commandss
  $ spack install babelstream +omp amdgpu_target=gfx701 
 ```
 
+
+## OpenCL
+
+* There are 4 different backend options for OpenCL : AMD,CUDA,INTEL, POCL 
+* No need to specify `amdgpu_target` or `cuda_arch` here since we are using AMD and CUDA as backend respectively.
+
+
+| Flag        | Definition                      | 
+|-----------| ----------------------------------|
+| backend     | 4 different backend options: <br />- cuda <br />- amd <br />- intel <br />- pocl | 
+
+
+```shell
+# Example 1:  CUDA backend
+ $ spack install babelstream%gcc +ocl backend=cuda
+
+# Example 2:  AMD backend 
+ $ spack install babelstream%gcc +ocl backend=amd
+ 
+# Example 3:  Intel backend
+ $ spack install babelstream%gcc +ocl backend=intel
+
+# Example 4:  POCL backend
+ $ spack install babelstream%gcc +ocl backend=pocl
+```
\ No newline at end of file

From 7715c0843b874d34761a60a63252ced816c76b34 Mon Sep 17 00:00:00 2001
From: Kaan Olgu <k.olgu20@gmail.com>
Date: Fri, 16 Jun 2023 22:20:26 +0000
Subject: [PATCH 06/19] STD Instructions for Spack

---
 docs/spack_instructions.md | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/docs/spack_instructions.md b/docs/spack_instructions.md
index 759f276..bbcdc4c 100644
--- a/docs/spack_instructions.md
+++ b/docs/spack_instructions.md
@@ -42,7 +42,6 @@
 
 ## OpenCL
 
-* There are 4 different backend options for OpenCL : AMD,CUDA,INTEL, POCL 
 * No need to specify `amdgpu_target` or `cuda_arch` here since we are using AMD and CUDA as backend respectively.
 
 
@@ -63,4 +62,20 @@
 
 # Example 4:  POCL backend
  $ spack install babelstream%gcc +ocl backend=pocl
+```
+
+## STD
+* Minimum GCC version requirement `10.1.0`
+* NVHPC Offload will be added in the future release 
+
+```shell
+# Example 1:  data 
+ $ spack install babelstream +stddata
+
+# Example 2:  ranges
+ $ spack install babelstream +stdranges
+ 
+# Example 3:  indices
+ $ spack install babelstream +stdindices
+
 ```
\ No newline at end of file

From 406cc0010edb30b27f1111adf6eda292edcd8bcf Mon Sep 17 00:00:00 2001
From: Kaan Olgu <k.olgu20@gmail.com>
Date: Sat, 17 Jun 2023 09:02:13 +0000
Subject: [PATCH 07/19] HIP Instructions for Spack

---
 docs/spack_instructions.md | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/docs/spack_instructions.md b/docs/spack_instructions.md
index bbcdc4c..fa9f0fa 100644
--- a/docs/spack_instructions.md
+++ b/docs/spack_instructions.md
@@ -4,8 +4,7 @@
 * [OpenMP](#omp)
 * [OpenCL](#ocl)
 * [STD](#std)
-* [STD20](#std20)
-* [Hip](#hip)
+* [Hip(ROCM)](#hip)
 * [Cuda](#cuda)
 * [Kokkos](#kokkos)
 * [Sycl](#sycl)
@@ -78,4 +77,30 @@
 # Example 3:  indices
  $ spack install babelstream +stdindices
 
+```
+
+## HIP(ROCM)
+
+*  `amdgpu_target` and `flags` are optional here.
+
+
+| Flag        | Definition                      | 
+|-----------| ----------------------------------|
+|amdgpu_target| List of supported architectures are provided [here](https://github.com/spack/spack/blob/0f271883831bec6da3fc64c92eb1805c39a9f09a/lib/spack/spack/build_systems/rocm.py#LL93C1-L125C19) | 
+|flags | Extra flags to pass |
+
+
+
+```shell
+# Example 1:  ROCM default
+ $ spack install babelstream +rocm
+
+# Example 2:  ROCM with GPU target
+ $ spack install babelstream +rocm amdgpu_target=<gfx701>
+ 
+# Example 3:  ROCM with extra flags option
+ $ spack install babelstream +rocm flags=<xxx>
+
+# Example 4:  ROCM with GPU target and extra flags
+ $ spack install babelstream +rocm amdgpu_target=<gfx701> flags=<xxx>
 ```
\ No newline at end of file

From 9b131722355c02dba91436d386f802b2fbf9e65a Mon Sep 17 00:00:00 2001
From: Kaan Olgu <k.olgu20@gmail.com>
Date: Sat, 17 Jun 2023 09:17:32 +0000
Subject: [PATCH 08/19] CUDA Instructions for Spack

---
 docs/spack_instructions.md | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/docs/spack_instructions.md b/docs/spack_instructions.md
index fa9f0fa..2309211 100644
--- a/docs/spack_instructions.md
+++ b/docs/spack_instructions.md
@@ -1,5 +1,6 @@
 # Spack Instructions
 
+
 ## Table of contents
 * [OpenMP](#omp)
 * [OpenCL](#ocl)
@@ -103,4 +104,29 @@
 
 # Example 4:  ROCM with GPU target and extra flags
  $ spack install babelstream +rocm amdgpu_target=<gfx701> flags=<xxx>
+```
+
+## CUDA
+
+* The `cuda_arch` value is mandatory here. 
+* If a user provides a value for `mem`, device memory mode will be chosen accordingly
+* If a user provides a value for `flags`, additional CUDA flags will be passed to NVCC
+* In the absence of `mem` and `flags`, the execution will choose **DEFAULT** for device memory mode and no additional flags will be passed
+
+
+| Flag        | Definition                      | 
+|-----------| ----------------------------------|
+| cuda_arch     |- List of supported compute capabilities are provided [here](https://github.com/spack/spack/blob/0f271883831bec6da3fc64c92eb1805c39a9f09a/lib/spack/spack/build_systems/cuda.py#LL19C1-L47C6) <br />- Useful [link](https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/) for matching CUDA gencodes with NVIDIA architectures| 
+|mem| Device memory mode: <br />- **DEFAULT** allocate host and device memory pointers.<br />- **MANAGED** use CUDA Managed Memory.<br />- **PAGEFAULT** shared memory, only host pointers allocated | 
+|flags | Extra flags to pass |
+
+```shell
+# Example 1: CUDA no mem and flags specified
+ $ spack install babelstream +cuda cuda_arch=70
+
+# Example 2: for Nvidia GPU for Volta (sm_70) 
+ $ spack install babelstream +cuda cuda_arch=70 mem=managed
+ 
+# Example 3: CUDA with mem and flags specified
+ $ spack install babelstream +cuda cuda_arch=70 mem=managed flags=xxx 
 ```
\ No newline at end of file

From c3dee4b64d42cc14cbaffa14fa7e599f2d308df8 Mon Sep 17 00:00:00 2001
From: Kaan Olgu <k.olgu20@gmail.com>
Date: Sat, 17 Jun 2023 10:08:01 +0000
Subject: [PATCH 09/19] Kokkos Instructions for Spack

---
 docs/spack_instructions.md | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/docs/spack_instructions.md b/docs/spack_instructions.md
index 2309211..93a2aff 100644
--- a/docs/spack_instructions.md
+++ b/docs/spack_instructions.md
@@ -122,11 +122,35 @@
 
 ```shell
 # Example 1: CUDA no mem and flags specified
- $ spack install babelstream +cuda cuda_arch=70
+ $ spack install babelstream +cuda cuda_arch=<70>
 
 # Example 2: for Nvidia GPU for Volta (sm_70) 
- $ spack install babelstream +cuda cuda_arch=70 mem=managed
+ $ spack install babelstream +cuda cuda_arch=<70> mem=<managed>
  
 # Example 3: CUDA with mem and flags specified
- $ spack install babelstream +cuda cuda_arch=70 mem=managed flags=xxx 
+ $ spack install babelstream +cuda cuda_arch=<70> mem=<managed> flags=<CUDA_EXTRA_FLAGS> 
+```
+
+## Kokkos
+
+* Kokkos implementation requires kokkos source folder to be provided because it builds it from the scratch
+
+
+| Flag        | Definition                      | 
+|-----------| ----------------------------------|
+| dir | Download the kokkos release from github repository ( https://github.com/kokkos/kokkos ) and extract the zip file to a directory you want and target this directory with `dir` flag |
+| backend     | 2 different backend options: <br />- cuda <br />- omp | 
+| cuda_arch     |- List of supported compute capabilities are provided [here](https://github.com/spack/spack/blob/0f271883831bec6da3fc64c92eb1805c39a9f09a/lib/spack/spack/build_systems/cuda.py#LL19C1-L47C6) <br />- Useful [link](https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/) for matching CUDA gencodes with NVIDIA architectures| 
+
+
+```shell
+# Example 1:  No Backend option specified
+ $ spack install babelstream +kokkos dir=</home/user/Downloads/kokkos-x.x.xx>
+
+# Example 2:  CUDA backend 
+ $ spack install babelstream +kokkos backend=cuda cuda_arch=70 dir=</home/user/Downloads/kokkos-x.x.xx>
+ 
+# Example 3:  OMP backend
+ $ spack install babelstream +kokkos  backend=omp dir=</home/user/Downloads/kokkos-x.x.xx>
+
 ```
\ No newline at end of file

From 2f4e6a587342cea0e7724708cb1144f725e70ae7 Mon Sep 17 00:00:00 2001
From: Kaan Olgu <k.olgu20@gmail.com>
Date: Sat, 17 Jun 2023 11:29:18 +0000
Subject: [PATCH 10/19] SYCL/SYCL2020 Instructions for Spack

---
 docs/spack_instructions.md | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/docs/spack_instructions.md b/docs/spack_instructions.md
index 93a2aff..9bd6d7f 100644
--- a/docs/spack_instructions.md
+++ b/docs/spack_instructions.md
@@ -153,4 +153,31 @@
 # Example 3:  OMP backend
  $ spack install babelstream +kokkos  backend=omp dir=</home/user/Downloads/kokkos-x.x.xx>
 
-```
\ No newline at end of file
+```
+
+
+## SYCL2020
+* Instructions for installing the intel compilers are provided [here](https://spack.readthedocs.io/en/latest/build_systems/inteloneapipackage.html#building-a-package-with-icx)
+
+| Flag        | Definition                      | 
+|-----------| ----------------------------------|
+| implementation     | 3 different implementation options: <br />- OneAPI-ICPX <br />- OneAPI-DPCPP <br />- Compute-CPP <br />| 
+
+```shell
+# Example 1:  No implementation option specified (build for OneAPI-ICPX)
+ $ spack install babelstream%oneapi +sycl2020
+
+# Example 2:  OneAPI-DPCPP implementation 
+ $ spack install babelstream +sycl2020 implementation=ONEAPI-DPCPP
+```
+
+## SYCL
+
+| Flag        | Definition                      | 
+|-----------| ----------------------------------|
+| implementation     | 2 different implementation options: <br />- OneAPI-DPCPP <br />- Compute-CPP <br />| 
+
+```shell
+# Example 1:  OneAPI-DPCPP implementation 
+ $ spack install babelstream +sycl2020 implementation=ONEAPI-DPCPP
+```

From 178763fd528ac35f5352d143248bbfa055e9002e Mon Sep 17 00:00:00 2001
From: Kaan Olgu <k.olgu20@gmail.com>
Date: Sat, 17 Jun 2023 11:34:24 +0000
Subject: [PATCH 11/19] ACC Instructions for Spack

---
 docs/spack_instructions.md | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/docs/spack_instructions.md b/docs/spack_instructions.md
index 9bd6d7f..1424a25 100644
--- a/docs/spack_instructions.md
+++ b/docs/spack_instructions.md
@@ -181,3 +181,19 @@
 # Example 1:  OneAPI-DPCPP implementation 
  $ spack install babelstream +sycl2020 implementation=ONEAPI-DPCPP
 ```
+## ACC
+* Target device selection process is automatic with 2 options:
+    * **gpu** : Globally set the target device to an NVIDIA GPU automatically if `cuda_arch` is specified 
+    * **multicore** : Globally set the target device to the host CPU automatically if `cpu_arch` is specified 
+
+| Flag        | Definition                      | 
+|-----------| ----------------------------------|
+| cuda_arch     |- List of supported compute capabilities are provided [here](https://github.com/spack/spack/blob/0f271883831bec6da3fc64c92eb1805c39a9f09a/lib/spack/spack/build_systems/cuda.py#LL19C1-L47C6) <br />- Useful [link](https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/) for matching CUDA gencodes with NVIDIA architectures| 
+| CPU_ARCH   | This sets the `-tp` (target processor) flag, possible values are: <br /> `px`          - Generic x86 Processor <br /> `bulldozer`   - AMD Bulldozer processor <br /> `piledriver`  - AMD Piledriver processor <br /> `zen`         - AMD Zen architecture (Epyc, Ryzen) <br /> `zen2`        - AMD Zen 2 architecture (Ryzen 2) <br />  `sandybridge` - Intel SandyBridge processor <br /> `haswell`     - Intel Haswell processor <br /> `knl`         - Intel Knights Landing processor <br /> `skylake`     - Intel Skylake Xeon processor <br /> `host`        - Link native version of HPC SDK cpu math library <br /> `native`      - Alias for -tp host | `cpu_arch=skylake` |
+```shell
+# Example 1:  For GPU Run 
+ $ spack install babelstream +acc cuda_arch=<70>
+
+# Example 2:  For Multicore CPU Run 
+ $ spack install babelstream +acc cpu_arch=<bulldozer>
+```
\ No newline at end of file

From 61f23698e5d56c1e77d6695c9c86e6a0d978a10f Mon Sep 17 00:00:00 2001
From: Kaan Olgu <k.olgu20@gmail.com>
Date: Sat, 17 Jun 2023 11:37:52 +0000
Subject: [PATCH 12/19] RAJA Instructions for Spack

---
 docs/spack_instructions.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/docs/spack_instructions.md b/docs/spack_instructions.md
index 1424a25..a9e10b7 100644
--- a/docs/spack_instructions.md
+++ b/docs/spack_instructions.md
@@ -190,10 +190,25 @@
 |-----------| ----------------------------------|
 | cuda_arch     |- List of supported compute capabilities are provided [here](https://github.com/spack/spack/blob/0f271883831bec6da3fc64c92eb1805c39a9f09a/lib/spack/spack/build_systems/cuda.py#LL19C1-L47C6) <br />- Useful [link](https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/) for matching CUDA gencodes with NVIDIA architectures| 
 | CPU_ARCH   | This sets the `-tp` (target processor) flag, possible values are: <br /> `px`          - Generic x86 Processor <br /> `bulldozer`   - AMD Bulldozer processor <br /> `piledriver`  - AMD Piledriver processor <br /> `zen`         - AMD Zen architecture (Epyc, Ryzen) <br /> `zen2`        - AMD Zen 2 architecture (Ryzen 2) <br />  `sandybridge` - Intel SandyBridge processor <br /> `haswell`     - Intel Haswell processor <br /> `knl`         - Intel Knights Landing processor <br /> `skylake`     - Intel Skylake Xeon processor <br /> `host`        - Link native version of HPC SDK cpu math library <br /> `native`      - Alias for -tp host | `cpu_arch=skylake` |
+
 ```shell
 # Example 1:  For GPU Run 
  $ spack install babelstream +acc cuda_arch=<70>
 
 # Example 2:  For Multicore CPU Run 
  $ spack install babelstream +acc cpu_arch=<bulldozer>
+```
+
+## RAJA
+* RAJA implementation requires RAJA source folder to be provided because it builds it from the scratch
+
+
+| Flag        | Definition                      | 
+|-----------| ----------------------------------|
+| dir | Download the Raja release from github repository and extract the zip file to a directory you want and target this directory with `dir` flag |
+| backend     | 2 different backend options: <br />- cuda <br />- omp | 
+|offload| Choose offloading platform `offload= [cpu]/[nvidia]` |
+```shell
+# Example 1:  For CPU offload with backend OMP 
+ $ spack install babelstream +raja offload=cpu backend=omp dir=/home/dir/raja
 ```
\ No newline at end of file

From 0040130b67ba611ae259d51d3776fd072b77bfb8 Mon Sep 17 00:00:00 2001
From: Kaan Olgu <k.olgu20@gmail.com>
Date: Sat, 17 Jun 2023 11:38:50 +0000
Subject: [PATCH 13/19] TBB Instructions for Spack

---
 docs/spack_instructions.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs/spack_instructions.md b/docs/spack_instructions.md
index a9e10b7..2f995c9 100644
--- a/docs/spack_instructions.md
+++ b/docs/spack_instructions.md
@@ -208,7 +208,14 @@
 | dir | Download the Raja release from github repository and extract the zip file to a directory you want and target this directory with `dir` flag |
 | backend     | 2 different backend options: <br />- cuda <br />- omp | 
 |offload| Choose offloading platform `offload= [cpu]/[nvidia]` |
+
 ```shell
 # Example 1:  For CPU offload with backend OMP 
  $ spack install babelstream +raja offload=cpu backend=omp dir=/home/dir/raja
+```
+
+## TBB
+```shell
+# Example: 
+ $ spack install babelstream +tbb
 ```
\ No newline at end of file

From 217249ff38e114e252749a75396c3e10fc0aa2e9 Mon Sep 17 00:00:00 2001
From: Kaan Olgu <k.olgu20@gmail.com>
Date: Sat, 17 Jun 2023 12:05:16 +0000
Subject: [PATCH 14/19] THRUST Instructions for Spack

---
 docs/spack_instructions.md | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/docs/spack_instructions.md b/docs/spack_instructions.md
index 2f995c9..708deed 100644
--- a/docs/spack_instructions.md
+++ b/docs/spack_instructions.md
@@ -218,4 +218,22 @@
 ```shell
 # Example: 
  $ spack install babelstream +tbb
-```
\ No newline at end of file
+```
+
+## THRUST
+
+| Flag        | Definition                      | 
+|-----------| ----------------------------------|
+|implementation| Choose one of the implementation for Thrust. Options are `cuda` and `rocm` | `implementation = [cuda]/[rocm]` |
+|backend| CUDA's Thrust implementation supports the following backends:- CUDA- OMP - TBB |
+| cuda_arch     |- List of supported compute capabilities are provided [here](https://github.com/spack/spack/blob/0f271883831bec6da3fc64c92eb1805c39a9f09a/lib/spack/spack/build_systems/cuda.py#LL19C1-L47C6) <br />- Useful [link](https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/) for matching CUDA gencodes with NVIDIA architectures| 
+|flags | Additional CUDA flags passed to nvcc, this is appended after `CUDA_ARCH` | 
+
+```shell
+# Example1: CUDA implementation
+$ spack install babelstream +thrust implementation=cuda backend=cuda cuda_arch=<70> flags=<option>
+$ spack install babelstream +thrust implementation=cuda backend=omp cuda_arch=<70> flags=<option>
+$ spack install babelstream +thrust implementation=cuda backend=tbb cuda_arch=<70> flags=<option>
+# Example1: ROCM implementation
+*  spack install babelstream +thrust implementation=rocm backend=<option>
+```

From e8c8edee170a27616dc39868099d7895aa51ad99 Mon Sep 17 00:00:00 2001
From: Kaan Olgu <k.olgu20@gmail.com>
Date: Sat, 17 Jun 2023 12:06:53 +0000
Subject: [PATCH 15/19] Fix link in README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7e38453..ad7873b 100644
--- a/README.md
+++ b/README.md
@@ -160,7 +160,7 @@ $ spack install babelstream@<version>%<compiler> +<model> <model specific flags>
 $ cd SPACK_INSTALL_DIRECTORY/opt/spack/system-name/compiler-name/babelstream-version-identifier/bin/
 $ ./<model>-stream
 ```
-More detailed examples are provided in [Spack README file]()
+More detailed examples are provided in [Spack README file](./docs/spack_instructions.md)
 The `MODEL` option selects one implementation of BabelStream to build.
 
 Currently available models are:

From a9f1b7c4cb801b39fdb051397af88257e426052f Mon Sep 17 00:00:00 2001
From: Tom Deakin <thomasdeakin@gmail.com>
Date: Tue, 20 Jun 2023 15:58:23 +0100
Subject: [PATCH 16/19] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ad7873b..5e35025 100644
--- a/README.md
+++ b/README.md
@@ -143,7 +143,7 @@ Alternatively, refer to the [CI script](./src/ci-test-compile.sh), which test-co
 
 
 The project supports building with Spack >= 0.19.0, which can be installed without root via the [official GitHub repo](https://github.com/spack/spack).
-The BabelStream Spack Package source code could be accessed from the link [here](https://github.com/spack/spack/tree/develop/var/spack/repos/builtin/packages/babelstream/package.py)
+The BabelStream Spack Package source code could be accessed from the link [here](https://github.com/spack/spack/tree/develop/var/spack/repos/builtin/packages/babelstream/package.py).
 Each BabelStream implementation (programming model) is built as follows:
 
 ```shell

From 75b487bfdba8089d73e246ab4c6692ecfff22877 Mon Sep 17 00:00:00 2001
From: Tom Deakin <thomasdeakin@gmail.com>
Date: Tue, 20 Jun 2023 15:58:29 +0100
Subject: [PATCH 17/19] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5e35025..f25c5c1 100644
--- a/README.md
+++ b/README.md
@@ -160,7 +160,7 @@ $ spack install babelstream@<version>%<compiler> +<model> <model specific flags>
 $ cd SPACK_INSTALL_DIRECTORY/opt/spack/system-name/compiler-name/babelstream-version-identifier/bin/
 $ ./<model>-stream
 ```
-More detailed examples are provided in [Spack README file](./docs/spack_instructions.md)
+More detailed examples are provided in [Spack README file](./docs/spack_instructions.md).
 The `MODEL` option selects one implementation of BabelStream to build.
 
 Currently available models are:

From 35d3cc8552e3435af908505f5f18705db9709cc7 Mon Sep 17 00:00:00 2001
From: Tom Deakin <thomasdeakin@gmail.com>
Date: Tue, 20 Jun 2023 15:58:34 +0100
Subject: [PATCH 18/19] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f25c5c1..7df3397 100644
--- a/README.md
+++ b/README.md
@@ -161,7 +161,7 @@ $ cd SPACK_INSTALL_DIRECTORY/opt/spack/system-name/compiler-name/babelstream-ver
 $ ./<model>-stream
 ```
 More detailed examples are provided in [Spack README file](./docs/spack_instructions.md).
-The `MODEL` option selects one implementation of BabelStream to build.
+The `MODEL` variant selects one implementation of BabelStream to build.
 
 Currently available models are:
 ```

From 1d43fcb3e7d2f4d43b3d985194de0a6b438069b8 Mon Sep 17 00:00:00 2001
From: Aksel Alpay <aksel.alpay@uni-heidelberg.de>
Date: Thu, 6 Jul 2023 22:38:50 +0200
Subject: [PATCH 19/19] std-indices: Fix infinite recursion in
 ranged::operator!=

---
 src/std-indices/STDIndicesStream.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/std-indices/STDIndicesStream.h b/src/std-indices/STDIndicesStream.h
index bc068aa..c2eec0e 100644
--- a/src/std-indices/STDIndicesStream.h
+++ b/src/std-indices/STDIndicesStream.h
@@ -36,7 +36,7 @@ public:
       iterator operator+(const value_type v) const { return iterator(num + v); }
 
       bool operator==(iterator other) const { return num == other.num; }
-      bool operator!=(iterator other) const { return *this != other; }
+      bool operator!=(iterator other) const { return num != other.num; }
       bool operator<(iterator other) const { return num < other.num; }
 
       reference operator*() const { return num;}