Merge branch 'master' of github.com:UoB-HPC/GPU-STREAM

This commit is contained in:
Tom Deakin 2015-09-21 15:39:20 +01:00
commit cecddb146b
3 changed files with 47 additions and 11 deletions

View File

@ -13,6 +13,41 @@ Build the OpenCL and CUDA binaries with `make`
Run the OpenCL version with `./gpu-stream-ocl` and the CUDA version with `./gpu-stream-cuda` Run the OpenCL version with `./gpu-stream-ocl` and the CUDA version with `./gpu-stream-cuda`
Android
-------
Assuming you have a recent Android NDK available, you can use the
toolchain that it provides to build GPU-STREAM. You should first
use the NDK to generate a standalone toolchain:
# Select a directory to install the toolchain to
ANDROID_NATIVE_TOOLCHAIN=/path/to/toolchain
${NDK}/build/tools/make-standalone-toolchain.sh \
--platform=android-14 \
--toolchain=arm-linux-androideabi-4.8 \
--install-dir=${ANDROID_NATIVE_TOOLCHAIN}
Make sure that the OpenCL headers and library (libOpenCL.so) are
available in `${ANDROID_NATIVE_TOOLCHAIN}/sysroot/usr/`.
You should then be able to build GPU-STREAM:
make CXX=${ANDROID_NATIVE_TOOLCHAIN}/bin/arm-linux-androideabi-g++
Copy the executable and OpenCL kernels to the device:
adb push gpu-stream-ocl /data/local/tmp
adb push ocl-stream-kernels.cl /data/local/tmp
Run GPU-STREAM from an adb shell:
adb shell
cd /data/local/tmp
# Use float if device doesn't support double, and reduce array size
./gpu-stream-ocl --float -n 6 -s 10000000
Results Results
------- -------

View File

@ -38,18 +38,18 @@
// Default array size 50 * 2^20 (50*8 Mebibytes double precision) // Default array size 50 * 2^20 (50*8 Mebibytes double precision)
// Use binary powers of two so divides 1024 // Use binary powers of two so divides 1024
int ARRAY_SIZE = 52428800; unsigned int ARRAY_SIZE = 52428800;
int NTIMES = 10; unsigned int NTIMES = 10;
bool useFloat = false; bool useFloat = false;
int deviceIndex = 0; unsigned int deviceIndex = 0;
int parseInt(const char *str, int *output) int parseUInt(const char *str, unsigned int *output)
{ {
char *next; char *next;
*output = strtol(str, &next, 10); *output = strtoul(str, &next, 10);
return !strlen(next); return !strlen(next);
} }
@ -64,7 +64,7 @@ void parseArguments(int argc, char *argv[])
} }
else if (!strcmp(argv[i], "--device")) else if (!strcmp(argv[i], "--device"))
{ {
if (++i >= argc || !parseInt(argv[i], &deviceIndex)) if (++i >= argc || !parseUInt(argv[i], &deviceIndex))
{ {
std::cout << "Invalid device index" << std::endl; std::cout << "Invalid device index" << std::endl;
exit(1); exit(1);
@ -72,7 +72,7 @@ void parseArguments(int argc, char *argv[])
} }
else if (!strcmp(argv[i], "--arraysize") || !strcmp(argv[i], "-s")) else if (!strcmp(argv[i], "--arraysize") || !strcmp(argv[i], "-s"))
{ {
if (++i >= argc || !parseInt(argv[i], &ARRAY_SIZE)) if (++i >= argc || !parseUInt(argv[i], &ARRAY_SIZE))
{ {
std::cout << "Invalid array size" << std::endl; std::cout << "Invalid array size" << std::endl;
exit(1); exit(1);
@ -80,7 +80,7 @@ void parseArguments(int argc, char *argv[])
} }
else if (!strcmp(argv[i], "--numtimes") || !strcmp(argv[i], "-n")) else if (!strcmp(argv[i], "--numtimes") || !strcmp(argv[i], "-n"))
{ {
if (++i >= argc || !parseInt(argv[i], &NTIMES)) if (++i >= argc || !parseUInt(argv[i], &NTIMES))
{ {
std::cout << "Invalid number of times" << std::endl; std::cout << "Invalid number of times" << std::endl;
exit(1); exit(1);

View File

@ -36,6 +36,7 @@
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <cstdlib>
#include <cstring> #include <cstring>
#include <limits> #include <limits>
#include <stdexcept> #include <stdexcept>
@ -46,12 +47,12 @@ extern void parseArguments(int argc, char *argv[]);
extern void listDevices(void); extern void listDevices(void);
extern int ARRAY_SIZE; extern unsigned int ARRAY_SIZE;
extern int NTIMES; extern unsigned int NTIMES;
extern bool useFloat; extern bool useFloat;
extern int deviceIndex; extern unsigned int deviceIndex;
template < typename T > template < typename T >