diff --git a/CMakeLists.txt b/CMakeLists.txt index 91b4f5d..f80d762 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,16 @@ cmake_minimum_required(VERSION 3.2) +if(NOT DEFINED HIP_PATH) + if(NOT DEFINED ENV{HIP_PATH}) + set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed") + else() + set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed") + endif() +endif() +set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) + + set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -27,6 +37,18 @@ if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Cray") list(APPEND CMAKE_CXX_FLAGS -hstd=c++11) endif () +#------------------------------------------------------------------------------- +# HIP +#------------------------------------------------------------------------------- +find_package(HIP QUIET) +if(${HIP_FOUND}) + list(APPEND HIP_HIPCC_FLAGS --std=c++11) + hip_add_executable(gpu-stream-hip main.cpp HIPStream.cu) + target_compile_definitions(gpu-stream-hip PUBLIC HIP) +else() + message("Skipping HIP...") +endif() + #------------------------------------------------------------------------------- # CUDA #------------------------------------------------------------------------------- diff --git a/HIPStream.cu b/HIPStream.cu new file mode 100644 index 0000000..ec34955 --- /dev/null +++ b/HIPStream.cu @@ -0,0 +1,214 @@ +// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith, +// University of Bristol HPC +// +// For full license terms please see the LICENSE file distributed with this +// source code + + +#include "HIPStream.h" +#include "hip/hip_runtime.h" + +#define TBSIZE 1024 + +void check_error(void) +{ + hipError_t err = hipGetLastError(); + if (err != hipSuccess) + { + std::cerr << "Error: " << hipGetErrorString(err) << std::endl; + exit(err); + } +} + +template +HIPStream::HIPStream(const unsigned int ARRAY_SIZE, const int device_index) +{ + + // The array size must be divisible by TBSIZE for kernel launches + if (ARRAY_SIZE % TBSIZE != 0) + { + std::stringstream ss; + ss << "Array size must be a multiple of " << TBSIZE; + throw std::runtime_error(ss.str()); + } + + // Set device + int count; + hipGetDeviceCount(&count); + check_error(); + if (device_index >= count) + throw std::runtime_error("Invalid device index"); + hipSetDevice(device_index); + check_error(); + + // Print out device information + std::cout << "Using HIP device " << getDeviceName(device_index) << std::endl; + std::cout << "Driver: " << getDeviceDriver(device_index) << std::endl; + + array_size = ARRAY_SIZE; + + // Check buffers fit on the device + hipDeviceProp_t props; + hipGetDeviceProperties(&props, 0); + if (props.totalGlobalMem < 3*ARRAY_SIZE*sizeof(T)) + throw std::runtime_error("Device does not have enough memory for all 3 buffers"); + + // Create device buffers + hipMalloc(&d_a, ARRAY_SIZE*sizeof(T)); + check_error(); + hipMalloc(&d_b, ARRAY_SIZE*sizeof(T)); + check_error(); + hipMalloc(&d_c, ARRAY_SIZE*sizeof(T)); + check_error(); +} + + +template +HIPStream::~HIPStream() +{ + hipFree(d_a); + check_error(); + hipFree(d_b); + check_error(); + hipFree(d_c); + check_error(); +} + +template +void HIPStream::write_arrays(const std::vector& a, const std::vector& b, const std::vector& c) +{ + // Copy host memory to device + hipMemcpy(d_a, a.data(), a.size()*sizeof(T), hipMemcpyHostToDevice); + check_error(); + hipMemcpy(d_b, b.data(), b.size()*sizeof(T), hipMemcpyHostToDevice); + check_error(); + hipMemcpy(d_c, c.data(), c.size()*sizeof(T), hipMemcpyHostToDevice); + check_error(); +} + +template +void HIPStream::read_arrays(std::vector& a, std::vector& b, std::vector& c) +{ + // Copy device memory to host + hipMemcpy(a.data(), d_a, a.size()*sizeof(T), hipMemcpyDeviceToHost); + check_error(); + hipMemcpy(b.data(), d_b, b.size()*sizeof(T), hipMemcpyDeviceToHost); + check_error(); + hipMemcpy(c.data(), d_c, c.size()*sizeof(T), hipMemcpyDeviceToHost); + check_error(); +} + + +template +__global__ void copy_kernel(hipLaunchParm lp, const T * a, T * c) +{ + const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + c[i] = a[i]; +} + +template +void HIPStream::copy() +{ + hipLaunchKernel(HIP_KERNEL_NAME(copy_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_c); + check_error(); + hipDeviceSynchronize(); + check_error(); +} + +template +__global__ void mul_kernel(hipLaunchParm lp, T * b, const T * c) +{ + const T scalar = 0.3; + const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + b[i] = scalar * c[i]; +} + +template +void HIPStream::mul() +{ + hipLaunchKernel(HIP_KERNEL_NAME(mul_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_b, d_c); + check_error(); + hipDeviceSynchronize(); + check_error(); +} + +template +__global__ void add_kernel(hipLaunchParm lp, const T * a, const T * b, T * c) +{ + const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + c[i] = a[i] + b[i]; +} + +template +void HIPStream::add() +{ + hipLaunchKernel(HIP_KERNEL_NAME(add_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_b, d_c); + check_error(); + hipDeviceSynchronize(); + check_error(); +} + +template +__global__ void triad_kernel(hipLaunchParm lp, T * a, const T * b, const T * c) +{ + const T scalar = 0.3; + const int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + a[i] = b[i] + scalar * c[i]; +} + +template +void HIPStream::triad() +{ + hipLaunchKernel(HIP_KERNEL_NAME(triad_kernel), dim3(array_size/TBSIZE), dim3(TBSIZE), 0, 0, d_a, d_b, d_c); + check_error(); + hipDeviceSynchronize(); + check_error(); +} + + +void listDevices(void) +{ + // Get number of devices + int count; + hipGetDeviceCount(&count); + check_error(); + + // Print device names + if (count == 0) + { + std::cerr << "No devices found." << std::endl; + } + else + { + std::cout << std::endl; + std::cout << "Devices:" << std::endl; + for (int i = 0; i < count; i++) + { + std::cout << i << ": " << getDeviceName(i) << std::endl; + } + std::cout << std::endl; + } +} + + +std::string getDeviceName(const int device) +{ + hipDeviceProp_t props; + hipGetDeviceProperties(&props, device); + check_error(); + return std::string(props.name); +} + + +std::string getDeviceDriver(const int device) +{ + hipSetDevice(device); + check_error(); + int driver; + hipDriverGetVersion(&driver); + check_error(); + return std::to_string(driver); +} + +template class HIPStream; +template class HIPStream; diff --git a/HIPStream.h b/HIPStream.h new file mode 100644 index 0000000..9015e35 --- /dev/null +++ b/HIPStream.h @@ -0,0 +1,43 @@ + +// Copyright (c) 2015-16 Tom Deakin, Simon McIntosh-Smith, +// University of Bristol HPC +// +// For full license terms please see the LICENSE file distributed with this +// source code + +#pragma once + +#include +#include +#include + +#include "Stream.h" + +#define IMPLEMENTATION_STRING "HIP" + +template +class HIPStream : public Stream +{ + protected: + // Size of arrays + unsigned int array_size; + // Device side pointers to arrays + T *d_a; + T *d_b; + T *d_c; + + + public: + + HIPStream(const unsigned int, const int); + ~HIPStream(); + + virtual void copy() override; + virtual void add() override; + virtual void mul() override; + virtual void triad() override; + + virtual void write_arrays(const std::vector& a, const std::vector& b, const std::vector& c) override; + virtual void read_arrays(std::vector& a, std::vector& b, std::vector& c) override; + +}; diff --git a/main.cpp b/main.cpp index f1be420..fb689cf 100644 --- a/main.cpp +++ b/main.cpp @@ -20,6 +20,8 @@ #if defined(CUDA) #include "CUDAStream.h" +#elif defined(HIP) +#include "HIPStream.h" #elif defined(OCL) #include "OCLStream.h" #elif defined(USE_RAJA) @@ -103,6 +105,10 @@ void run() // Use the CUDA implementation stream = new CUDAStream(ARRAY_SIZE, deviceIndex); +#elif defined(HIP) + // Use the HIP implementation + stream = new HIPStream(ARRAY_SIZE, deviceIndex); + #elif defined(OCL) // Use the OpenCL implementation stream = new OCLStream(ARRAY_SIZE, deviceIndex); diff --git a/results/v2.0/acc-pgi-kernel/broadwell.txt b/results/v2.0/broadwell/acc-pgi-kernel.txt similarity index 100% rename from results/v2.0/acc-pgi-kernel/broadwell.txt rename to results/v2.0/broadwell/acc-pgi-kernel.txt diff --git a/results/v2.0/acc-pgi-loops/broadwell.txt b/results/v2.0/broadwell/acc-pgi-loops.txt similarity index 100% rename from results/v2.0/acc-pgi-loops/broadwell.txt rename to results/v2.0/broadwell/acc-pgi-loops.txt diff --git a/results/v2.0/kokkos/broadwell.txt b/results/v2.0/broadwell/kokkos-gcc.txt similarity index 100% rename from results/v2.0/kokkos/broadwell.txt rename to results/v2.0/broadwell/kokkos-gcc.txt diff --git a/results/v2.0/original-cray/broadwell.txt b/results/v2.0/broadwell/mccalpin-cray.txt similarity index 100% rename from results/v2.0/original-cray/broadwell.txt rename to results/v2.0/broadwell/mccalpin-cray.txt diff --git a/results/v2.0/ocl-gnu/broadwell.txt b/results/v2.0/broadwell/ocl.txt similarity index 100% rename from results/v2.0/ocl-gnu/broadwell.txt rename to results/v2.0/broadwell/ocl.txt diff --git a/results/v2.0/omp3-cray/broadwell.txt b/results/v2.0/broadwell/omp-cray.txt similarity index 100% rename from results/v2.0/omp3-cray/broadwell.txt rename to results/v2.0/broadwell/omp-cray.txt diff --git a/results/v2.0/raja/broadwell.txt b/results/v2.0/broadwell/raja-gcc.txt similarity index 100% rename from results/v2.0/raja/broadwell.txt rename to results/v2.0/broadwell/raja-gcc.txt diff --git a/results/v2.0/furynano/hip.txt b/results/v2.0/furynano/hip.txt new file mode 100644 index 0000000..780ca26 --- /dev/null +++ b/results/v2.0/furynano/hip.txt @@ -0,0 +1,14 @@ +GPU-STREAM +Version: 2.0 +Implementation: HIP +Running kernels 100 times +Precision: double +Array size: 268.4 MB (=0.3 GB) +Total size: 805.3 MB (=0.8 GB) +Using HIP device Fiji +Driver: 4 +Function MBytes/sec Min (sec) Max Average +Copy 414098.238 0.00130 0.00132 0.00131 +Mul 416699.068 0.00129 0.00134 0.00131 +Add 422965.910 0.00190 0.00195 0.00192 +Triad 417453.151 0.00193 0.00196 0.00194 diff --git a/results/v2.0/ocl-gnu/fury.txt b/results/v2.0/furyx/ocl.txt similarity index 100% rename from results/v2.0/ocl-gnu/fury.txt rename to results/v2.0/furyx/ocl.txt diff --git a/results/v2.0/sycl/fury.txt b/results/v2.0/furyx/sycl.txt similarity index 100% rename from results/v2.0/sycl/fury.txt rename to results/v2.0/furyx/sycl.txt diff --git a/results/v2.0/acc-pgi-loops/980Ti.txt b/results/v2.0/gtx980ti/acc-pgi-loops.txt similarity index 100% rename from results/v2.0/acc-pgi-loops/980Ti.txt rename to results/v2.0/gtx980ti/acc-pgi-loops.txt diff --git a/results/v2.0/cuda-gnu/980ti.txt b/results/v2.0/gtx980ti/cuda.txt similarity index 100% rename from results/v2.0/cuda-gnu/980ti.txt rename to results/v2.0/gtx980ti/cuda.txt diff --git a/results/v2.0/kokkos/980ti.txt b/results/v2.0/gtx980ti/kokkos.txt similarity index 100% rename from results/v2.0/kokkos/980ti.txt rename to results/v2.0/gtx980ti/kokkos.txt diff --git a/results/v2.0/ocl-gnu/980ti.txt b/results/v2.0/gtx980ti/ocl.txt similarity index 100% rename from results/v2.0/ocl-gnu/980ti.txt rename to results/v2.0/gtx980ti/ocl.txt diff --git a/results/v2.0/raja/980ti.txt b/results/v2.0/gtx980ti/raja.txt similarity index 100% rename from results/v2.0/raja/980ti.txt rename to results/v2.0/gtx980ti/raja.txt diff --git a/results/v2.0/acc-pgi-kernel/haswell.txt b/results/v2.0/haswell/acc-pgi-kernel.txt similarity index 100% rename from results/v2.0/acc-pgi-kernel/haswell.txt rename to results/v2.0/haswell/acc-pgi-kernel.txt diff --git a/results/v2.0/acc-pgi-loops/haswell.txt b/results/v2.0/haswell/acc-pgi-loops.txt similarity index 100% rename from results/v2.0/acc-pgi-loops/haswell.txt rename to results/v2.0/haswell/acc-pgi-loops.txt diff --git a/results/v2.0/cuda-x86/Haswell.txt b/results/v2.0/haswell/cuda.txt similarity index 100% rename from results/v2.0/cuda-x86/Haswell.txt rename to results/v2.0/haswell/cuda.txt diff --git a/results/v2.0/kokkos/haswell.txt b/results/v2.0/haswell/kokkos-gcc.txt similarity index 100% rename from results/v2.0/kokkos/haswell.txt rename to results/v2.0/haswell/kokkos-gcc.txt diff --git a/results/v2.0/original-cray/haswell.txt b/results/v2.0/haswell/mccalpin-cray.txt similarity index 100% rename from results/v2.0/original-cray/haswell.txt rename to results/v2.0/haswell/mccalpin-cray.txt diff --git a/results/v2.0/ocl-gnu/haswell.txt b/results/v2.0/haswell/ocl.txt similarity index 100% rename from results/v2.0/ocl-gnu/haswell.txt rename to results/v2.0/haswell/ocl.txt diff --git a/results/v2.0/omp3-cray/haswell.txt b/results/v2.0/haswell/omp-cray.txt similarity index 100% rename from results/v2.0/omp3-cray/haswell.txt rename to results/v2.0/haswell/omp-cray.txt diff --git a/results/v2.0/raja/haswell.txt b/results/v2.0/haswell/raja-gcc.txt similarity index 100% rename from results/v2.0/raja/haswell.txt rename to results/v2.0/haswell/raja-gcc.txt diff --git a/results/v2.0/acc-pgi-kernel/ivybridge.txt b/results/v2.0/ivybridge/acc-pgi-kernel.txt similarity index 100% rename from results/v2.0/acc-pgi-kernel/ivybridge.txt rename to results/v2.0/ivybridge/acc-pgi-kernel.txt diff --git a/results/v2.0/acc-pgi-loops/IvyBridge.txt b/results/v2.0/ivybridge/acc-pgi-loops.txt similarity index 100% rename from results/v2.0/acc-pgi-loops/IvyBridge.txt rename to results/v2.0/ivybridge/acc-pgi-loops.txt diff --git a/results/v2.0/cuda-x86/IvyBridge.txt b/results/v2.0/ivybridge/cuda.txt similarity index 100% rename from results/v2.0/cuda-x86/IvyBridge.txt rename to results/v2.0/ivybridge/cuda.txt diff --git a/results/v2.0/kokkos/ivybridge.txt b/results/v2.0/ivybridge/kokkos-gcc.txt similarity index 100% rename from results/v2.0/kokkos/ivybridge.txt rename to results/v2.0/ivybridge/kokkos-gcc.txt diff --git a/results/v2.0/original-icc/ivybridge.txt b/results/v2.0/ivybridge/mccalpin-intel.txt similarity index 100% rename from results/v2.0/original-icc/ivybridge.txt rename to results/v2.0/ivybridge/mccalpin-intel.txt diff --git a/results/v2.0/ocl-gnu/IvyBridge.txt b/results/v2.0/ivybridge/ocl.txt similarity index 100% rename from results/v2.0/ocl-gnu/IvyBridge.txt rename to results/v2.0/ivybridge/ocl.txt diff --git a/results/v2.0/omp3-intel/IvyBridge.txt b/results/v2.0/ivybridge/omp-intel.txt similarity index 100% rename from results/v2.0/omp3-intel/IvyBridge.txt rename to results/v2.0/ivybridge/omp-intel.txt diff --git a/results/v2.0/raja/ivybridge.txt b/results/v2.0/ivybridge/raja-gcc.txt similarity index 100% rename from results/v2.0/raja/ivybridge.txt rename to results/v2.0/ivybridge/raja-gcc.txt diff --git a/results/v2.0/sycl/IvyBridge.txt b/results/v2.0/ivybridge/sycl.txt similarity index 100% rename from results/v2.0/sycl/IvyBridge.txt rename to results/v2.0/ivybridge/sycl.txt diff --git a/results/v2.0/acc-cray/K20X.txt b/results/v2.0/k20x/acc-cray.txt similarity index 100% rename from results/v2.0/acc-cray/K20X.txt rename to results/v2.0/k20x/acc-cray.txt diff --git a/results/v2.0/cuda-gnu/K20X.txt b/results/v2.0/k20x/cuda.txt similarity index 100% rename from results/v2.0/cuda-gnu/K20X.txt rename to results/v2.0/k20x/cuda.txt diff --git a/results/v2.0/kokkos/K20X.txt b/results/v2.0/k20x/kokkos.txt similarity index 100% rename from results/v2.0/kokkos/K20X.txt rename to results/v2.0/k20x/kokkos.txt diff --git a/results/v2.0/ocl-gnu/K20X.txt b/results/v2.0/k20x/ocl.txt similarity index 100% rename from results/v2.0/ocl-gnu/K20X.txt rename to results/v2.0/k20x/ocl.txt diff --git a/results/v2.0/omp45-cray/K20X.txt b/results/v2.0/k20x/omp-cray.txt similarity index 100% rename from results/v2.0/omp45-cray/K20X.txt rename to results/v2.0/k20x/omp-cray.txt diff --git a/results/v2.0/raja/K20X.txt b/results/v2.0/k20x/raja.txt similarity index 100% rename from results/v2.0/raja/K20X.txt rename to results/v2.0/k20x/raja.txt diff --git a/results/v2.0/acc-cray/K40.txt b/results/v2.0/k40/acc-cray.txt similarity index 100% rename from results/v2.0/acc-cray/K40.txt rename to results/v2.0/k40/acc-cray.txt diff --git a/results/v2.0/cuda-cray/K40.txt b/results/v2.0/k40/cuda.txt similarity index 100% rename from results/v2.0/cuda-cray/K40.txt rename to results/v2.0/k40/cuda.txt diff --git a/results/v2.0/kokkos/K40.txt b/results/v2.0/k40/kokkos.txt similarity index 100% rename from results/v2.0/kokkos/K40.txt rename to results/v2.0/k40/kokkos.txt diff --git a/results/v2.0/ocl-gnu/K40.txt b/results/v2.0/k40/ocl.txt similarity index 100% rename from results/v2.0/ocl-gnu/K40.txt rename to results/v2.0/k40/ocl.txt diff --git a/results/v2.0/raja/K40.txt b/results/v2.0/k40/raja.txt similarity index 100% rename from results/v2.0/raja/K40.txt rename to results/v2.0/k40/raja.txt diff --git a/results/v2.0/acc-cray/K80.txt b/results/v2.0/k80/acc-cray.txt similarity index 100% rename from results/v2.0/acc-cray/K80.txt rename to results/v2.0/k80/acc-cray.txt diff --git a/results/v2.0/cuda-cray/K80.txt b/results/v2.0/k80/cuda.txt similarity index 100% rename from results/v2.0/cuda-cray/K80.txt rename to results/v2.0/k80/cuda.txt diff --git a/results/v2.0/kokkos/K80.txt b/results/v2.0/k80/kokkos.txt similarity index 100% rename from results/v2.0/kokkos/K80.txt rename to results/v2.0/k80/kokkos.txt diff --git a/results/v2.0/ocl-gnu/K80.txt b/results/v2.0/k80/ocl.txt similarity index 100% rename from results/v2.0/ocl-gnu/K80.txt rename to results/v2.0/k80/ocl.txt diff --git a/results/v2.0/raja/K80.txt b/results/v2.0/k80/raja.txt similarity index 100% rename from results/v2.0/raja/K80.txt rename to results/v2.0/k80/raja.txt diff --git a/results/v2.0/knl/openacc-pgi.txt b/results/v2.0/knl/acc-pgi-kernel.txt similarity index 100% rename from results/v2.0/knl/openacc-pgi.txt rename to results/v2.0/knl/acc-pgi-kernel.txt diff --git a/results/v2.0/knl/kokkos-128.txt b/results/v2.0/knl/kokkos-intel-128threads.txt similarity index 100% rename from results/v2.0/knl/kokkos-128.txt rename to results/v2.0/knl/kokkos-intel-128threads.txt diff --git a/results/v2.0/knl/mccalpin.txt b/results/v2.0/knl/mccalpin-intel.txt similarity index 100% rename from results/v2.0/knl/mccalpin.txt rename to results/v2.0/knl/mccalpin-intel.txt diff --git a/results/v2.0/ocl-gnu/knl.txt b/results/v2.0/knl/ocl.txt similarity index 100% rename from results/v2.0/ocl-gnu/knl.txt rename to results/v2.0/knl/ocl.txt diff --git a/results/v2.0/knl/omp3.txt b/results/v2.0/knl/omp-intel.txt similarity index 100% rename from results/v2.0/knl/omp3.txt rename to results/v2.0/knl/omp-intel.txt diff --git a/results/v2.0/knl/raja.txt b/results/v2.0/knl/raja-intel.txt similarity index 100% rename from results/v2.0/knl/raja.txt rename to results/v2.0/knl/raja-intel.txt diff --git a/results/v2.0/sycl/knl.txt b/results/v2.0/knl/sycl.txt similarity index 100% rename from results/v2.0/sycl/knl.txt rename to results/v2.0/knl/sycl.txt diff --git a/results/v2.0/omp40-cray/K40.txt b/results/v2.0/omp40-cray/K40.txt deleted file mode 100644 index 31f1b33..0000000 --- a/results/v2.0/omp40-cray/K40.txt +++ /dev/null @@ -1,12 +0,0 @@ -GPU-STREAM -Version: 2.0 -Implementation: OpenMP 4.0 -Running kernels 100 times -Precision: double -Array size: 268.4 MB (=0.3 GB) -Total size: 805.3 MB (=0.8 GB) -Function MBytes/sec Min (sec) Max Average -Copy 183090.545 0.00293 0.00295 0.00294 -Mul 182133.500 0.00295 0.00298 0.00295 -Add 180897.478 0.00445 0.00447 0.00446 -Triad 180637.056 0.00446 0.00447 0.00446 diff --git a/results/v2.0/omp40-cray/K80.txt b/results/v2.0/omp40-cray/K80.txt deleted file mode 100644 index c69f76e..0000000 --- a/results/v2.0/omp40-cray/K80.txt +++ /dev/null @@ -1,12 +0,0 @@ -GPU-STREAM -Version: 2.0 -Implementation: OpenMP 4.0 -Running kernels 100 times -Precision: double -Array size: 268.4 MB (=0.3 GB) -Total size: 805.3 MB (=0.8 GB) -Function MBytes/sec Min (sec) Max Average -Copy 169214.022 0.00317 0.00437 0.00323 -Mul 168803.444 0.00318 0.00435 0.00323 -Add 167171.006 0.00482 0.00571 0.00486 -Triad 166943.598 0.00482 0.00710 0.00489 diff --git a/results/v2.0/omp45-clang/980ti.txt b/results/v2.0/omp45-clang/980ti.txt deleted file mode 100644 index 621a33d..0000000 --- a/results/v2.0/omp45-clang/980ti.txt +++ /dev/null @@ -1,12 +0,0 @@ -GPU-STREAM -Version: 2.0 -Implementation: OpenMP 4.5 -Running kernels 100 times -Precision: double -Array size: 268.4 MB (=0.3 GB) -Total size: 805.3 MB (=0.8 GB) -Function MBytes/sec Min (sec) Max Average -Copy 232637.036 0.00231 0.00715 0.00238 -Mul 227777.705 0.00236 0.00259 0.00247 -Add 246187.342 0.00327 0.00343 0.00332 -Triad 239670.377 0.00336 0.00362 0.00347 diff --git a/results/v2.0/xl-power8/kokkos.txt b/results/v2.0/power8/kokkos-xl.txt similarity index 100% rename from results/v2.0/xl-power8/kokkos.txt rename to results/v2.0/power8/kokkos-xl.txt diff --git a/results/v2.0/xl-power8/mccalpin.txt b/results/v2.0/power8/mccalpin-xl.txt similarity index 100% rename from results/v2.0/xl-power8/mccalpin.txt rename to results/v2.0/power8/mccalpin-xl.txt diff --git a/results/v2.0/xl-power8/omp3.txt b/results/v2.0/power8/omp-xl.txt similarity index 100% rename from results/v2.0/xl-power8/omp3.txt rename to results/v2.0/power8/omp-xl.txt diff --git a/results/v2.0/gcc-power8/raja.txt b/results/v2.0/power8/raja-gcc.txt similarity index 100% rename from results/v2.0/gcc-power8/raja.txt rename to results/v2.0/power8/raja-gcc.txt diff --git a/results/v2.0/xl-power8/raja.txt b/results/v2.0/power8/raja-xl.txt similarity index 100% rename from results/v2.0/xl-power8/raja.txt rename to results/v2.0/power8/raja-xl.txt diff --git a/results/v2.0/acc-pgi-loops/S9150.txt b/results/v2.0/s9150/acc-pgi-loops.txt similarity index 100% rename from results/v2.0/acc-pgi-loops/S9150.txt rename to results/v2.0/s9150/acc-pgi-loops.txt diff --git a/results/v2.0/ocl-gnu/S9150.txt b/results/v2.0/s9150/ocl.txt similarity index 100% rename from results/v2.0/ocl-gnu/S9150.txt rename to results/v2.0/s9150/ocl.txt diff --git a/results/v2.0/sycl/S9150.txt b/results/v2.0/s9150/sycl.txt similarity index 100% rename from results/v2.0/sycl/S9150.txt rename to results/v2.0/s9150/sycl.txt diff --git a/results/v2.0/s9300x2/hip.txt b/results/v2.0/s9300x2/hip.txt new file mode 100644 index 0000000..d9f78aa --- /dev/null +++ b/results/v2.0/s9300x2/hip.txt @@ -0,0 +1,14 @@ +GPU-STREAM +Version: 2.0 +Implementation: HIP +Running kernels 100 times +Precision: double +Array size: 268.4 MB (=0.3 GB) +Total size: 805.3 MB (=0.8 GB) +Using HIP device Fiji +Driver: 4 +Function MBytes/sec Min (sec) Max Average +Copy 442194.067 0.00121 0.03320 0.00331 +Mul 442500.583 0.00121 0.00195 0.00149 +Add 459234.293 0.00175 0.00355 0.00248 +Triad 458682.906 0.00176 0.00357 0.00246 diff --git a/results/v2.0/acc-pgi-kernel/sandybridge.txt b/results/v2.0/sandybridge/acc-pgi-kernel.txt similarity index 100% rename from results/v2.0/acc-pgi-kernel/sandybridge.txt rename to results/v2.0/sandybridge/acc-pgi-kernel.txt diff --git a/results/v2.0/acc-pgi-loops/SandyBridge.txt b/results/v2.0/sandybridge/acc-pgi-loops.txt similarity index 100% rename from results/v2.0/acc-pgi-loops/SandyBridge.txt rename to results/v2.0/sandybridge/acc-pgi-loops.txt diff --git a/results/v2.0/cuda-x86/SandyBridge b/results/v2.0/sandybridge/cuda.txt similarity index 100% rename from results/v2.0/cuda-x86/SandyBridge rename to results/v2.0/sandybridge/cuda.txt diff --git a/results/v2.0/kokkos/sandybridge.txt b/results/v2.0/sandybridge/kokkos-gcc.txt similarity index 100% rename from results/v2.0/kokkos/sandybridge.txt rename to results/v2.0/sandybridge/kokkos-gcc.txt diff --git a/results/v2.0/original-icc/sandybridge.txt b/results/v2.0/sandybridge/mccalpin-intel.txt similarity index 100% rename from results/v2.0/original-icc/sandybridge.txt rename to results/v2.0/sandybridge/mccalpin-intel.txt diff --git a/results/v2.0/ocl-gnu/SandyBridge.txt b/results/v2.0/sandybridge/ocl.txt similarity index 100% rename from results/v2.0/ocl-gnu/SandyBridge.txt rename to results/v2.0/sandybridge/ocl.txt diff --git a/results/v2.0/omp3-intel/SandyBridge.txt b/results/v2.0/sandybridge/omp-intel.txt similarity index 100% rename from results/v2.0/omp3-intel/SandyBridge.txt rename to results/v2.0/sandybridge/omp-intel.txt diff --git a/results/v2.0/raja/sandybridge.txt b/results/v2.0/sandybridge/raja-gcc.txt similarity index 100% rename from results/v2.0/raja/sandybridge.txt rename to results/v2.0/sandybridge/raja-gcc.txt diff --git a/results/v2.0/titanx/hip.txt b/results/v2.0/titanx/hip.txt new file mode 100644 index 0000000..6104be2 --- /dev/null +++ b/results/v2.0/titanx/hip.txt @@ -0,0 +1,14 @@ +GPU-STREAM +Version: 2.0 +Implementation: HIP +Running kernels 100 times +Precision: double +Array size: 268.4 MB (=0.3 GB) +Total size: 805.3 MB (=0.8 GB) +Using HIP device GeForce GTX TITAN X +Driver: 4 +Function MBytes/sec Min (sec) Max Average +Copy 263048.615 0.00204 0.00205 0.00205 +Mul 262831.366 0.00204 0.00205 0.00205 +Add 268754.019 0.00300 0.00301 0.00300 +Triad 268630.840 0.00300 0.00301 0.00300