diff --git a/README.md b/README.md index 15e33af..056ffcf 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,41 @@ Build the OpenCL and CUDA binaries with `make` Run the OpenCL version with `./gpu-stream-ocl` and the CUDA version with `./gpu-stream-cuda` +Android +------- + +Assuming you have a recent Android NDK available, you can use the +toolchain that it provides to build GPU-STREAM. You should first +use the NDK to generate a standalone toolchain: + + # Select a directory to install the toolchain to + ANDROID_NATIVE_TOOLCHAIN=/path/to/toolchain + + ${NDK}/build/tools/make-standalone-toolchain.sh \ + --platform=android-14 \ + --toolchain=arm-linux-androideabi-4.8 \ + --install-dir=${ANDROID_NATIVE_TOOLCHAIN} + +Make sure that the OpenCL headers and library (libOpenCL.so) are +available in `${ANDROID_NATIVE_TOOLCHAIN}/sysroot/usr/`. + +You should then be able to build GPU-STREAM: + + make CXX=${ANDROID_NATIVE_TOOLCHAIN}/bin/arm-linux-androideabi-g++ + +Copy the executable and OpenCL kernels to the device: + + adb push gpu-stream-ocl /data/local/tmp + adb push ocl-stream-kernels.cl /data/local/tmp + +Run GPU-STREAM from an adb shell: + + adb shell + cd /data/local/tmp + + # Use float if device doesn't support double, and reduce array size + ./gpu-stream-ocl --float -n 6 -s 10000000 + Results ------- diff --git a/common.cpp b/common.cpp index d266685..781d70e 100644 --- a/common.cpp +++ b/common.cpp @@ -38,18 +38,18 @@ // Default array size 50 * 2^20 (50*8 Mebibytes double precision) // Use binary powers of two so divides 1024 -int ARRAY_SIZE = 52428800; +unsigned int ARRAY_SIZE = 52428800; -int NTIMES = 10; +unsigned int NTIMES = 10; bool useFloat = false; -int deviceIndex = 0; +unsigned int deviceIndex = 0; -int parseInt(const char *str, int *output) +int parseUInt(const char *str, unsigned int *output) { char *next; - *output = strtol(str, &next, 10); + *output = strtoul(str, &next, 10); return !strlen(next); } @@ -64,7 +64,7 @@ void parseArguments(int argc, char *argv[]) } else if (!strcmp(argv[i], "--device")) { - if (++i >= argc || !parseInt(argv[i], &deviceIndex)) + if (++i >= argc || !parseUInt(argv[i], &deviceIndex)) { std::cout << "Invalid device index" << std::endl; exit(1); @@ -72,7 +72,7 @@ void parseArguments(int argc, char *argv[]) } else if (!strcmp(argv[i], "--arraysize") || !strcmp(argv[i], "-s")) { - if (++i >= argc || !parseInt(argv[i], &ARRAY_SIZE)) + if (++i >= argc || !parseUInt(argv[i], &ARRAY_SIZE)) { std::cout << "Invalid array size" << std::endl; exit(1); @@ -80,7 +80,7 @@ void parseArguments(int argc, char *argv[]) } else if (!strcmp(argv[i], "--numtimes") || !strcmp(argv[i], "-n")) { - if (++i >= argc || !parseInt(argv[i], &NTIMES)) + if (++i >= argc || !parseUInt(argv[i], &NTIMES)) { std::cout << "Invalid number of times" << std::endl; exit(1); diff --git a/common.h b/common.h index 3ef4b75..a4dd886 100644 --- a/common.h +++ b/common.h @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -46,12 +47,12 @@ extern void parseArguments(int argc, char *argv[]); extern void listDevices(void); -extern int ARRAY_SIZE; -extern int NTIMES; +extern unsigned int ARRAY_SIZE; +extern unsigned int NTIMES; extern bool useFloat; -extern int deviceIndex; +extern unsigned int deviceIndex; template < typename T >