diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 2427ed1..b44197f 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -3,6 +3,20 @@ on: [push, pull_request] jobs: + + test-java: + runs-on: ubuntu-18.04 + defaults: + run: + working-directory: ./java-stream + steps: + - uses: actions/checkout@v2 + - name: Test build project + run: ./mvnw clean package + - name: Test run + if: ${{ ! cancelled() }} + run: java -jar target/java-stream.jar --arraysize 2048 + test-julia: runs-on: ubuntu-18.04 defaults: @@ -31,6 +45,7 @@ jobs: if: ${{ ! cancelled() }} run: julia --project src/AMDGPUStream.jl --list + test: runs-on: ubuntu-18.04 steps: diff --git a/java-stream/.gitignore b/java-stream/.gitignore new file mode 100644 index 0000000..2ed994a --- /dev/null +++ b/java-stream/.gitignore @@ -0,0 +1,128 @@ +## File-based project format: +.idea +*.iws +*.iml + +## Plugin-specific files: + +# IntelliJ +/out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties +### VisualStudioCode template +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +### Linux template +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +# Windows thumbnail cache files +Thumbs.db +ehthumbs.db +ehthumbs_vista.db + +# Folder config file +Desktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msm +*.msp + +# Windows shortcuts +*.lnk +### Maven template +target/ +pom.xml.tag +pom.xml.releaseBackup +pom.xml.versionsBackup +pom.xml.next +release.properties +dependency-reduced-pom.xml +buildNumber.properties +.mvn/timing.properties + +# Avoid ignoring Maven wrapper jar file (.jar files are usually ignored) +!/.mvn/wrapper/maven-wrapper.jar +### Java template +# Compiled class file +*.class + +# Log file +*.log + +# BlueJ files +*.ctxt + +# Mobile Tools for Java (J2ME) +.mtj.tmp/ + +# Package Files # +*.jar +*.war +*.ear +*.zip +*.tar.gz +*.rar + +# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml +hs_err_pid* +### macOS template +*.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + + +!.mvn/**/* + +settings.xml diff --git a/java-stream/.mvn/wrapper/maven-wrapper.jar b/java-stream/.mvn/wrapper/maven-wrapper.jar new file mode 100644 index 0000000..9cc84ea Binary files /dev/null and b/java-stream/.mvn/wrapper/maven-wrapper.jar differ diff --git a/java-stream/.mvn/wrapper/maven-wrapper.properties b/java-stream/.mvn/wrapper/maven-wrapper.properties new file mode 100644 index 0000000..56bb016 --- /dev/null +++ b/java-stream/.mvn/wrapper/maven-wrapper.properties @@ -0,0 +1 @@ +distributionUrl=https://repo1.maven.org/maven2/org/apache/maven/apache-maven/3.5.0/apache-maven-3.5.0-bin.zip \ No newline at end of file diff --git a/java-stream/README.md b/java-stream/README.md new file mode 100644 index 0000000..6c233da --- /dev/null +++ b/java-stream/README.md @@ -0,0 +1,172 @@ +java-stream +=========== + +This is an implementation of BabelStream in Java 8 which contains the following implementations: + +* `jdk-plain` - Single threaded `for` +* `jdk-stream` - Threaded implementation using JDK8's parallel stream API +* `tornadovm` - A [TornadoVM](https://github.com/beehive-lab/TornadoVM) implementation for + PTX/OpenCL +* `aparapi` - A [Aparapi](https://git.qoto.org/aparapi/aparapi) implementation for OpenCL + +### Build & Run + +Prerequisites + +* JDK >= 8 + +To run the benchmark, first create a binary: + +```shell +> cd java-stream +> ./mvnw clean package +``` + +The binary will be located at `./target/java-stream.jar`. Run it with: + +```shell +> java -version  ✔  11.0.11+9 ☕  tom@soraws-uk  05:03:20 +openjdk version "11.0.11" 2021-04-20 +OpenJDK Runtime Environment GraalVM CE 21.1.0 (build 11.0.11+8-jvmci-21.1-b05) +OpenJDK 64-Bit Server VM GraalVM CE 21.1.0 (build 11.0.11+8-jvmci-21.1-b05, mixed mode) +> java -jar target/java-stream.jar --help +``` + +For best results, benchmark with the following JVM flags: + +``` +-XX:-UseOnStackReplacement # disable OSR, not useful for this benchmark as we are measuring peak performance +-XX:-TieredCompilation # disable C1, go straight to C2 +-XX:ReservedCodeCacheSize=512m # don't flush compiled code out of cache at any point +``` + +Worked example: + +```shell +> java -XX:-UseOnStackReplacement -XX:-TieredCompilation -XX:ReservedCodeCacheSize=512m -jar target/java-stream.jar +BabelStream +Version: 3.4 +Implementation: jdk-stream; (Java 11.0.11;Red Hat, Inc.; home=/usr/lib/jvm/java-11-openjdk-11.0.11.0.9-4.fc33.x86_64) +Running all 100 times +Precision: double +Array size: 268.4 MB (=0.3 GB) +Total size: 805.3 MB (=0.8 GB) +Function MBytes/sec Min (sec) Max Average +Copy 17145.538 0.03131 0.04779 0.03413 +Mul 16759.092 0.03203 0.04752 0.03579 +Add 19431.954 0.04144 0.05866 0.04503 +Triad 19763.970 0.04075 0.05388 0.04510 +Dot 26646.894 0.02015 0.03013 0.02259 +``` + +If your OpenCL/CUDA installation is not at the default location, TornadoVM and Aparapi may fail to +detect your devices. In those cases, you may specify the library directly, for example: + +```shell +> LD_PRELOAD=/opt/rocm-4.0.0/opencl/lib/libOpenCL.so.1.2 java -jar target/java-stream.jar ... +``` + +### Instructions for TornadoVM + +The TornadoVM implementation requires you to run the binary with a patched JVM. Follow the +official [instructions](https://github.com/beehive-lab/TornadoVM/blob/master/assembly/src/docs/10_INSTALL_WITH_GRAALVM.md) +or use the following simplified instructions: + +Prerequisites + +* CMake >= 3.6 +* GCC or clang/LLVM (GCC >= 5.5) +* Python >= 2.7 +* Maven >= 3.6.3 +* OpenCL headers >= 1.2 and/or CUDA SDK >= 9.0 + +First, get a copy of the TornadoVM source: + +```shell +> cd +> git clone https://github.com/beehive-lab/TornadoVM tornadovm +``` + +Take note of the required GraalVM version +in `tornadovm/assembly/src/docs/10_INSTALL_WITH_GRAALVM.md`. We'll use `21.1.0` in this example. +Now, obtain a copy of GraalVM and make sure the version matches the one required by TornadoVM: + +```shell +> wget https://github.com/graalvm/graalvm-ce-builds/releases/download/vm-21.1.0/graalvm-ce-java11-linux-amd64-21.1.0.tar.gz +> tar -xf graalvm-ce-java11-linux-amd64-21.1.0.tar.gz +``` + +Next, create `~/tornadovm/etc/sources.env` and populate the file with the following: + +```shell +#!/bin/bash +export JAVA_HOME= +export PATH=$PWD/bin/bin:$PATH +export TORNADO_SDK=$PWD/bin/sdk +export CMAKE_ROOT=/usr # path to CMake binary +``` + +Proceed to compile TornadoVM: + +```shell +> cd ~/tornadovm +> . etc/sources.env +> make graal-jdk-11-plus BACKEND={ptx,opencl} +``` + +To test your build, source the environment file: + +```shell +> source ~/tornadovm/etc/sources.env +> LD_PRELOAD=/opt/rocm-4.0.0/opencl/lib/libOpenCL.so.1.2 tornado --devices +Number of Tornado drivers: 1 +Total number of OpenCL devices : 3 +Tornado device=0:0 + AMD Accelerated Parallel Processing -- gfx1012 + Global Memory Size: 4.0 GB + Local Memory Size: 64.0 KB + Workgroup Dimensions: 3 + Max WorkGroup Configuration: [1024, 1024, 1024] + Device OpenCL C version: OpenCL C 2.0 + +Tornado device=0:1 + Portable Computing Language -- pthread-AMD Ryzen 9 3900X 12-Core Processor + Global Memory Size: 60.7 GB + Local Memory Size: 8.0 MB + Workgroup Dimensions: 3 + Max WorkGroup Configuration: [4096, 4096, 4096] + Device OpenCL C version: OpenCL C 1.2 pocl + +Tornado device=0:2 + NVIDIA CUDA -- NVIDIA GeForce GT 710 + Global Memory Size: 981.3 MB + Local Memory Size: 48.0 KB + Workgroup Dimensions: 3 + Max WorkGroup Configuration: [1024, 1024, 64] + Device OpenCL C version: OpenCL C 1.2 +``` + +You can now use TornadoVM to run java-stream: + +```shell +> tornado -jar ~/java-stream/target/java-stream.jar --impl tornadovm --arraysize 65536  1 ✘  11.0.11+9 ☕  tom@soraws-uk  05:31:34 +BabelStream +Version: 3.4 +Implementation: tornadovm; (Java 11.0.11;GraalVM Community; home=~/graalvm-ce-java11-21.1.0) +Running all 100 times +Precision: double +Array size: 0.5 MB (=0.0 GB) +Total size: 1.6 MB (=0.0 GB) +Using TornadoVM device: + - Name : NVIDIA GeForce GT 710 CL_DEVICE_TYPE_GPU (available) + - Id : opencl-0-0 + - Platform : NVIDIA CUDA + - Backend : OpenCL +Function MBytes/sec Min (sec) Max Average +Copy 8791.100 0.00012 0.00079 0.00015 +Mul 8774.107 0.00012 0.00061 0.00014 +Add 9903.313 0.00016 0.00030 0.00018 +Triad 9861.031 0.00016 0.00030 0.00018 +Dot 2799.465 0.00037 0.00056 0.00041 +``` + diff --git a/java-stream/mvnw b/java-stream/mvnw new file mode 100755 index 0000000..5bf251c --- /dev/null +++ b/java-stream/mvnw @@ -0,0 +1,225 @@ +#!/bin/sh +# ---------------------------------------------------------------------------- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ---------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------- +# Maven2 Start Up Batch script +# +# Required ENV vars: +# ------------------ +# JAVA_HOME - location of a JDK home dir +# +# Optional ENV vars +# ----------------- +# M2_HOME - location of maven2's installed home dir +# MAVEN_OPTS - parameters passed to the Java VM when running Maven +# e.g. to debug Maven itself, use +# set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 +# MAVEN_SKIP_RC - flag to disable loading of mavenrc files +# ---------------------------------------------------------------------------- + +if [ -z "$MAVEN_SKIP_RC" ] ; then + + if [ -f /etc/mavenrc ] ; then + . /etc/mavenrc + fi + + if [ -f "$HOME/.mavenrc" ] ; then + . "$HOME/.mavenrc" + fi + +fi + +# OS specific support. $var _must_ be set to either true or false. +cygwin=false; +darwin=false; +mingw=false +case "`uname`" in + CYGWIN*) cygwin=true ;; + MINGW*) mingw=true;; + Darwin*) darwin=true + # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home + # See https://developer.apple.com/library/mac/qa/qa1170/_index.html + if [ -z "$JAVA_HOME" ]; then + if [ -x "/usr/libexec/java_home" ]; then + export JAVA_HOME="`/usr/libexec/java_home`" + else + export JAVA_HOME="/Library/Java/Home" + fi + fi + ;; +esac + +if [ -z "$JAVA_HOME" ] ; then + if [ -r /etc/gentoo-release ] ; then + JAVA_HOME=`java-config --jre-home` + fi +fi + +if [ -z "$M2_HOME" ] ; then + ## resolve links - $0 may be a link to maven's home + PRG="$0" + + # need this for relative symlinks + while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG="`dirname "$PRG"`/$link" + fi + done + + saveddir=`pwd` + + M2_HOME=`dirname "$PRG"`/.. + + # make it fully qualified + M2_HOME=`cd "$M2_HOME" && pwd` + + cd "$saveddir" + # echo Using m2 at $M2_HOME +fi + +# For Cygwin, ensure paths are in UNIX format before anything is touched +if $cygwin ; then + [ -n "$M2_HOME" ] && + M2_HOME=`cygpath --unix "$M2_HOME"` + [ -n "$JAVA_HOME" ] && + JAVA_HOME=`cygpath --unix "$JAVA_HOME"` + [ -n "$CLASSPATH" ] && + CLASSPATH=`cygpath --path --unix "$CLASSPATH"` +fi + +# For Migwn, ensure paths are in UNIX format before anything is touched +if $mingw ; then + [ -n "$M2_HOME" ] && + M2_HOME="`(cd "$M2_HOME"; pwd)`" + [ -n "$JAVA_HOME" ] && + JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`" + # TODO classpath? +fi + +if [ -z "$JAVA_HOME" ]; then + javaExecutable="`which javac`" + if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then + # readlink(1) is not available as standard on Solaris 10. + readLink=`which readlink` + if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then + if $darwin ; then + javaHome="`dirname \"$javaExecutable\"`" + javaExecutable="`cd \"$javaHome\" && pwd -P`/javac" + else + javaExecutable="`readlink -f \"$javaExecutable\"`" + fi + javaHome="`dirname \"$javaExecutable\"`" + javaHome=`expr "$javaHome" : '\(.*\)/bin'` + JAVA_HOME="$javaHome" + export JAVA_HOME + fi + fi +fi + +if [ -z "$JAVACMD" ] ; then + if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + else + JAVACMD="`which java`" + fi +fi + +if [ ! -x "$JAVACMD" ] ; then + echo "Error: JAVA_HOME is not defined correctly." >&2 + echo " We cannot execute $JAVACMD" >&2 + exit 1 +fi + +if [ -z "$JAVA_HOME" ] ; then + echo "Warning: JAVA_HOME environment variable is not set." +fi + +CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher + +# traverses directory structure from process work directory to filesystem root +# first directory with .mvn subdirectory is considered project base directory +find_maven_basedir() { + + if [ -z "$1" ] + then + echo "Path not specified to find_maven_basedir" + return 1 + fi + + basedir="$1" + wdir="$1" + while [ "$wdir" != '/' ] ; do + if [ -d "$wdir"/.mvn ] ; then + basedir=$wdir + break + fi + # workaround for JBEAP-8937 (on Solaris 10/Sparc) + if [ -d "${wdir}" ]; then + wdir=`cd "$wdir/.."; pwd` + fi + # end of workaround + done + echo "${basedir}" +} + +# concatenates all lines of a file +concat_lines() { + if [ -f "$1" ]; then + echo "$(tr -s '\n' ' ' < "$1")" + fi +} + +BASE_DIR=`find_maven_basedir "$(pwd)"` +if [ -z "$BASE_DIR" ]; then + exit 1; +fi + +export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"} +echo $MAVEN_PROJECTBASEDIR +MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS" + +# For Cygwin, switch paths to Windows format before running java +if $cygwin; then + [ -n "$M2_HOME" ] && + M2_HOME=`cygpath --path --windows "$M2_HOME"` + [ -n "$JAVA_HOME" ] && + JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"` + [ -n "$CLASSPATH" ] && + CLASSPATH=`cygpath --path --windows "$CLASSPATH"` + [ -n "$MAVEN_PROJECTBASEDIR" ] && + MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"` +fi + +WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain + +exec "$JAVACMD" \ + $MAVEN_OPTS \ + -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \ + "-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \ + ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@" diff --git a/java-stream/mvnw.cmd b/java-stream/mvnw.cmd new file mode 100644 index 0000000..019bd74 --- /dev/null +++ b/java-stream/mvnw.cmd @@ -0,0 +1,143 @@ +@REM ---------------------------------------------------------------------------- +@REM Licensed to the Apache Software Foundation (ASF) under one +@REM or more contributor license agreements. See the NOTICE file +@REM distributed with this work for additional information +@REM regarding copyright ownership. The ASF licenses this file +@REM to you under the Apache License, Version 2.0 (the +@REM "License"); you may not use this file except in compliance +@REM with the License. You may obtain a copy of the License at +@REM +@REM http://www.apache.org/licenses/LICENSE-2.0 +@REM +@REM Unless required by applicable law or agreed to in writing, +@REM software distributed under the License is distributed on an +@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@REM KIND, either express or implied. See the License for the +@REM specific language governing permissions and limitations +@REM under the License. +@REM ---------------------------------------------------------------------------- + +@REM ---------------------------------------------------------------------------- +@REM Maven2 Start Up Batch script +@REM +@REM Required ENV vars: +@REM JAVA_HOME - location of a JDK home dir +@REM +@REM Optional ENV vars +@REM M2_HOME - location of maven2's installed home dir +@REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands +@REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a key stroke before ending +@REM MAVEN_OPTS - parameters passed to the Java VM when running Maven +@REM e.g. to debug Maven itself, use +@REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 +@REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files +@REM ---------------------------------------------------------------------------- + +@REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on' +@echo off +@REM enable echoing my setting MAVEN_BATCH_ECHO to 'on' +@if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO% + +@REM set %HOME% to equivalent of $HOME +if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%") + +@REM Execute a user defined script before this one +if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre +@REM check for pre script, once with legacy .bat ending and once with .cmd ending +if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat" +if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd" +:skipRcPre + +@setlocal + +set ERROR_CODE=0 + +@REM To isolate internal variables from possible post scripts, we use another setlocal +@setlocal + +@REM ==== START VALIDATION ==== +if not "%JAVA_HOME%" == "" goto OkJHome + +echo. +echo Error: JAVA_HOME not found in your environment. >&2 +echo Please set the JAVA_HOME variable in your environment to match the >&2 +echo location of your Java installation. >&2 +echo. +goto error + +:OkJHome +if exist "%JAVA_HOME%\bin\java.exe" goto init + +echo. +echo Error: JAVA_HOME is set to an invalid directory. >&2 +echo JAVA_HOME = "%JAVA_HOME%" >&2 +echo Please set the JAVA_HOME variable in your environment to match the >&2 +echo location of your Java installation. >&2 +echo. +goto error + +@REM ==== END VALIDATION ==== + +:init + +@REM Find the project base dir, i.e. the directory that contains the folder ".mvn". +@REM Fallback to current working directory if not found. + +set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR% +IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir + +set EXEC_DIR=%CD% +set WDIR=%EXEC_DIR% +:findBaseDir +IF EXIST "%WDIR%"\.mvn goto baseDirFound +cd .. +IF "%WDIR%"=="%CD%" goto baseDirNotFound +set WDIR=%CD% +goto findBaseDir + +:baseDirFound +set MAVEN_PROJECTBASEDIR=%WDIR% +cd "%EXEC_DIR%" +goto endDetectBaseDir + +:baseDirNotFound +set MAVEN_PROJECTBASEDIR=%EXEC_DIR% +cd "%EXEC_DIR%" + +:endDetectBaseDir + +IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig + +@setlocal EnableExtensions EnableDelayedExpansion +for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a +@endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS% + +:endReadAdditionalConfig + +SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe" + +set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar" +set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain + +%MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %* +if ERRORLEVEL 1 goto error +goto end + +:error +set ERROR_CODE=1 + +:end +@endlocal & set ERROR_CODE=%ERROR_CODE% + +if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost +@REM check for post script, once with legacy .bat ending and once with .cmd ending +if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat" +if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd" +:skipRcPost + +@REM pause the script if MAVEN_BATCH_PAUSE is set to 'on' +if "%MAVEN_BATCH_PAUSE%" == "on" pause + +if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE% + +exit /B %ERROR_CODE% diff --git a/java-stream/pom.xml b/java-stream/pom.xml new file mode 100644 index 0000000..ffaee72 --- /dev/null +++ b/java-stream/pom.xml @@ -0,0 +1,133 @@ + + + + 4.0.0 + + java-stream + javastream + 3.4.0 + + + UTF-8 + UTF-8 + 5.7.2 + + + + + universityOfManchester-graal + https://raw.githubusercontent.com/beehive-lab/tornado/maven-tornadovm + + + + + + + com.beust + jcommander + 1.81 + + + + tornado + tornado-api + 0.9 + + + + com.aparapi + aparapi + 2.0.0 + + + + org.scala-lang + scala-library + + + + + + org.junit.jupiter + junit-jupiter-engine + ${junit.version} + test + + + org.junit.jupiter + junit-jupiter-params + ${junit.version} + test + + + + + + + + maven-compiler-plugin + 3.8.1 + + 1.8 + 1.8 + -Xlint:all + true + true + + + + org.apache.maven.plugins + maven-surefire-plugin + 3.0.0-M5 + + + + + maven-shade-plugin + 3.2.4 + + + package + + shade + + + + + javastream.Main + + + + + *:* + + META-INF/*.MF + + + + ${project.artifactId} + + + + + + + com.coveo + fmt-maven-plugin + 2.9.1 + + + + format + + + + + + + + + \ No newline at end of file diff --git a/java-stream/src/main/java/javastream/FractionalMaths.java b/java-stream/src/main/java/javastream/FractionalMaths.java new file mode 100644 index 0000000..982a28a --- /dev/null +++ b/java-stream/src/main/java/javastream/FractionalMaths.java @@ -0,0 +1,45 @@ +package javastream; + +/** + * This class represents our Fractional typeclass. Java's type system isn't unified so we have to do + * insane things for parametric operations on fractional types. + */ +@SuppressWarnings("unchecked") +public final class FractionalMaths { + + private FractionalMaths() { + throw new AssertionError(); + } + + public static T from(Class evidence, Number n) { + if (evidence == Double.TYPE || evidence == Double.class) + return (T) Double.valueOf(n.doubleValue()); + else if (evidence == Float.TYPE || evidence == Float.class) + return (T) Float.valueOf(n.floatValue()); + throw new IllegalArgumentException(); + } + + public static T plus(T x, T y) { + if (x instanceof Double) return (T) Double.valueOf(x.doubleValue() + y.doubleValue()); + else if (x instanceof Float) return (T) Float.valueOf(x.floatValue() + y.floatValue()); + throw new IllegalArgumentException(); + } + + static T minus(T x, T y) { + if (x instanceof Double) return (T) Double.valueOf(x.doubleValue() - y.doubleValue()); + else if (x instanceof Float) return (T) Float.valueOf(x.floatValue() - y.floatValue()); + throw new IllegalArgumentException(); + } + + public static T times(T x, T y) { + if (x instanceof Double) return (T) Double.valueOf(x.doubleValue() * y.doubleValue()); + else if (x instanceof Float) return (T) Float.valueOf(x.floatValue() * y.floatValue()); + throw new IllegalArgumentException(); + } + + static T divide(T x, T y) { + if (x instanceof Double) return (T) Double.valueOf(x.doubleValue() / y.doubleValue()); + else if (x instanceof Float) return (T) Float.valueOf(x.floatValue() / y.floatValue()); + throw new IllegalArgumentException(); + } +} diff --git a/java-stream/src/main/java/javastream/JavaStream.java b/java-stream/src/main/java/javastream/JavaStream.java new file mode 100644 index 0000000..7ab96cb --- /dev/null +++ b/java-stream/src/main/java/javastream/JavaStream.java @@ -0,0 +1,172 @@ +package javastream; + +import java.time.Duration; +import java.util.AbstractMap; +import java.util.AbstractMap.SimpleImmutableEntry; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import javastream.Main.Config; + +public abstract class JavaStream { + + public static final class Data { + final T[] a, b, c; + + public Data(T[] a, T[] b, T[] c) { + this.a = Objects.requireNonNull(a); + this.b = Objects.requireNonNull(b); + this.c = Objects.requireNonNull(c); + } + } + + static final class Timings { + final List copy = new ArrayList<>(); + final List mul = new ArrayList<>(); + final List add = new ArrayList<>(); + final List triad = new ArrayList<>(); + final List dot = new ArrayList<>(); + } + + protected final Config config; + + protected JavaStream(Config config) { + this.config = config; + } + + protected abstract List listDevices(); + + protected abstract void initArrays(); + + protected abstract void copy(); + + protected abstract void mul(); + + protected abstract void add(); + + protected abstract void triad(); + + protected abstract void nstream(); + + protected abstract T dot(); + + protected abstract Data data(); + + public static class EnumeratedStream extends JavaStream { + + protected final JavaStream actual; + private final Entry, JavaStream>>[] options; + + @SafeVarargs + @SuppressWarnings("varargs") + public EnumeratedStream( + Config config, Entry, JavaStream>>... options) { + super(config); + this.actual = options[config.options.device].getValue().apply(config); + this.options = options; + } + + @Override + protected List listDevices() { + return Arrays.stream(options).map(Entry::getKey).collect(Collectors.toList()); + } + + @Override + public void initArrays() { + actual.initArrays(); + } + + @Override + public void copy() { + actual.copy(); + } + + @Override + public void mul() { + actual.mul(); + } + + @Override + public void add() { + actual.add(); + } + + @Override + public void triad() { + actual.triad(); + } + + @Override + public void nstream() { + actual.nstream(); + } + + @Override + public T dot() { + return actual.dot(); + } + + @Override + public Data data() { + return actual.data(); + } + } + + public static Double[] boxed(double[] xs) { + return Arrays.stream(xs).boxed().toArray(Double[]::new); + } + + public static Float[] boxed(float[] xs) { + return IntStream.range(0, xs.length).mapToObj(i -> xs[i]).toArray(Float[]::new); + } + + private static AbstractMap.SimpleImmutableEntry timed(Supplier f) { + long start = System.nanoTime(); + T r = f.get(); + long end = System.nanoTime(); + return new AbstractMap.SimpleImmutableEntry<>(Duration.ofNanos(end - start), r); + } + + private static Duration timed(Runnable f) { + long start = System.nanoTime(); + f.run(); + long end = System.nanoTime(); + return Duration.ofNanos(end - start); + } + + final SimpleImmutableEntry, T> runAll(int times) { + Timings timings = new Timings<>(); + T lastSum = null; + for (int i = 0; i < times; i++) { + timings.copy.add(timed(this::copy)); + timings.mul.add(timed(this::mul)); + timings.add.add(timed(this::add)); + timings.triad.add(timed(this::triad)); + SimpleImmutableEntry dot = timed(this::dot); + timings.dot.add(dot.getKey()); + lastSum = dot.getValue(); + } + return new SimpleImmutableEntry<>(timings, lastSum); + } + + final Duration runTriad(int times) { + return timed( + () -> { + for (int i = 0; i < times; i++) { + triad(); + } + }); + } + + final List runNStream(int times) { + return IntStream.range(0, times) + .mapToObj(i -> timed(this::nstream)) + .collect(Collectors.toList()); + } +} diff --git a/java-stream/src/main/java/javastream/Main.java b/java-stream/src/main/java/javastream/Main.java new file mode 100644 index 0000000..32b67a4 --- /dev/null +++ b/java-stream/src/main/java/javastream/Main.java @@ -0,0 +1,425 @@ +package javastream; + +import static javastream.FractionalMaths.divide; +import static javastream.FractionalMaths.from; +import static javastream.FractionalMaths.minus; +import static javastream.FractionalMaths.plus; +import static javastream.FractionalMaths.times; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import java.time.Duration; +import java.util.AbstractMap.SimpleImmutableEntry; +import java.util.Arrays; +import java.util.DoubleSummaryStatistics; +import java.util.List; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.stream.Collectors; +import javastream.JavaStream.Data; +import javastream.JavaStream.Timings; +import javastream.aparapi.AparapiStreams; +import javastream.jdk.JdkStreams; +import javastream.jdk.PlainStream; +import javastream.tornadovm.TornadoVMStreams; + +public class Main { + + enum Benchmark { + NSTREAM, + TRIAD, + ALL + } + + public static class Options { + + @Parameter(names = "--list", description = "List available devices for all implementations") + boolean list = false; + + @Parameter( + names = "--device", + description = "Select device at , see --list for options") + public int device = 0; + + @Parameter( + names = "--impl", + description = "Select implementation at , see --list for options") + public String impl = ""; + + @Parameter( + names = {"--numtimes", "-n"}, + description = "Run the test times (NUM >= 2)") + public int numtimes = 100; + + @Parameter( + names = {"--arraysize", "-s"}, + description = "Use elements in the array") + public int arraysize = 33554432; + + @Parameter(names = "--float", description = "Use floats (rather than doubles)") + public boolean useFloat = false; + + @Parameter(names = "--triad-only", description = "Only run triad") + public boolean triadOnly = false; + + @Parameter(names = "--nstream-only", description = "Only run nstream") + public boolean nstreamOnly = false; + + @Parameter(names = "--csv", description = "Output as csv table") + public boolean csv = false; + + @Parameter( + names = "--mibibytes", + description = "Use MiB=2^20 for bandwidth calculation (default MB=10^6)") + public boolean mibibytes = false; + + @Parameter(names = "--dot-tolerance", description = "Tolerance for dot kernel verification") + public double dotTolerance = 1.0e-8; + + public boolean isVerboseBenchmark() { + return !list && !csv; + } + } + + public static final class Config { + public final Options options; + public final Benchmark benchmark; + public final int typeSize; + public final Class evidence; + public final T ulp, scalar, initA, initB, initC; + + public Config( + Options options, + Benchmark benchmark, + int typeSize, + Class evidence, + T ulp, + T scalar, + T initA, + T initB, + T initC) { + this.options = Objects.requireNonNull(options); + this.benchmark = Objects.requireNonNull(benchmark); + this.typeSize = typeSize; + this.evidence = Objects.requireNonNull(evidence); + this.ulp = Objects.requireNonNull(ulp); + this.scalar = Objects.requireNonNull(scalar); + this.initA = Objects.requireNonNull(initA); + this.initB = Objects.requireNonNull(initB); + this.initC = Objects.requireNonNull(initC); + } + } + + static final class Implementation { + final String name; + final Function, JavaStream> makeFloat; + final Function, JavaStream> makeDouble; + + Implementation( + String name, + Function, JavaStream> makeFloat, + Function, JavaStream> makeDouble) { + this.name = Objects.requireNonNull(name); + this.makeFloat = Objects.requireNonNull(makeFloat); + this.makeDouble = Objects.requireNonNull(makeDouble); + } + } + + static boolean run( + String name, Config config, Function, JavaStream> mkStream) { + + Options opt = config.options; + + int arrayBytes = opt.arraysize * config.typeSize; + int totalBytes = arrayBytes * 3; + + String megaSuffix = opt.mibibytes ? "MiB" : "MB"; + String gigaSuffix = opt.mibibytes ? "GiB" : "GB"; + + double megaScale = opt.mibibytes ? Math.pow(2.0, -20) : 1.0e-6; + double gigaScale = opt.mibibytes ? Math.pow(2.0, -30) : 1.0e-9; + + if (!opt.csv) { + + String vendor = System.getProperty("java.vendor"); + String ver = System.getProperty("java.version"); + String home = System.getProperty("java.home"); + + System.out.println("BabelStream"); + System.out.printf("Version: %s%n", VERSION); + System.out.printf( + "Implementation: %s (Java %s; %s; JAVA_HOME=%s)%n", name, ver, vendor, home); + final String benchmarkName; + switch (config.benchmark) { + case NSTREAM: + benchmarkName = "nstream"; + break; + case TRIAD: + benchmarkName = "triad"; + break; + case ALL: + benchmarkName = "all"; + break; + default: + throw new AssertionError("Unexpected value: " + config.benchmark); + } + System.out.println("Running " + benchmarkName + " " + opt.numtimes + " times"); + + if (config.benchmark == Benchmark.TRIAD) { + System.out.println("Number of elements: " + opt.arraysize); + } + + System.out.println("Precision: " + (opt.useFloat ? "float" : "double")); + System.out.printf( + "Array size: %.1f %s (=%.1f %s)%n", + (megaScale * arrayBytes), megaSuffix, (gigaScale * arrayBytes), gigaSuffix); + System.out.printf( + "Total size: %.1f %s (=%.1f %s)%n", + (megaScale * totalBytes), megaSuffix, (gigaScale * totalBytes), gigaSuffix); + } + + JavaStream stream = mkStream.apply(config); + + stream.initArrays(); + + final boolean ok; + switch (config.benchmark) { + case ALL: + Entry, T> results = stream.runAll(opt.numtimes); + ok = checkSolutions(stream.data(), config, Optional.of(results.getValue())); + Timings timings = results.getKey(); + tabulateCsv( + opt.csv, + mkCsvRow(timings.copy, "Copy", 2 * arrayBytes, megaScale, opt), + mkCsvRow(timings.mul, "Mul", 2 * arrayBytes, megaScale, opt), + mkCsvRow(timings.add, "Add", 3 * arrayBytes, megaScale, opt), + mkCsvRow(timings.triad, "Triad", 3 * arrayBytes, megaScale, opt), + mkCsvRow(timings.dot, "Dot", 2 * arrayBytes, megaScale, opt)); + break; + case NSTREAM: + List nstreamResults = stream.runNStream(opt.numtimes); + ok = checkSolutions(stream.data(), config, Optional.empty()); + tabulateCsv(opt.csv, mkCsvRow(nstreamResults, "Nstream", 4 * arrayBytes, megaScale, opt)); + break; + case TRIAD: + Duration triadResult = stream.runTriad(opt.numtimes); + ok = checkSolutions(stream.data(), config, Optional.empty()); + int triadTotalBytes = 3 * arrayBytes * opt.numtimes; + double bandwidth = megaScale * (triadTotalBytes / durationToSeconds(triadResult)); + System.out.printf("Runtime (seconds): %.5f", durationToSeconds(triadResult)); + System.out.printf("Bandwidth (%s/s): %.3f ", gigaSuffix, bandwidth); + break; + default: + throw new AssertionError(); + } + return ok; + } + + private static boolean checkWithinTolerance( + String name, T[] xs, T gold, T tolerance) { + // it's ok to default to double for error calculation + double error = + Arrays.stream(xs) + .mapToDouble(x -> Math.abs(minus(x, gold).doubleValue())) + .summaryStatistics() + .getAverage(); + boolean failed = error > tolerance.doubleValue(); + if (failed) { + System.err.printf("Validation failed on %s. Average error %s%n", name, error); + } + return !failed; + } + + @SuppressWarnings("OptionalUsedAsFieldOrParameterType") + static boolean checkSolutions( + Data data, Config config, Optional dotSum) { + T goldA = config.initA; + T goldB = config.initB; + T goldC = config.initC; + + for (int i = 0; i < config.options.numtimes; i++) { + switch (config.benchmark) { + case ALL: + goldC = goldA; + goldB = times(config.scalar, goldC); + goldC = plus(goldA, goldB); + goldA = plus(goldB, times(config.scalar, goldC)); + break; + case TRIAD: + goldA = plus(goldB, times(config.scalar, goldC)); + break; + case NSTREAM: + goldA = plus(goldA, plus(goldB, times(config.scalar, goldC))); + break; + } + } + + T tolerance = times(config.ulp, from(config.evidence, 100)); + boolean aValid = checkWithinTolerance("a", data.a, goldA, tolerance); + boolean bValid = checkWithinTolerance("b", data.b, goldB, tolerance); + boolean cValid = checkWithinTolerance("c", data.c, goldC, tolerance); + + final T finalGoldA = goldA; + final T finalGoldB = goldB; + boolean sumValid = + dotSum + .map( + actual -> { + T goldSum = + times( + times(finalGoldA, finalGoldB), + from(config.evidence, config.options.arraysize)); + double error = Math.abs(divide(minus(actual, goldSum), goldSum).doubleValue()); + boolean failed = error > config.options.dotTolerance; + if (failed) { + System.err.printf( + "Validation failed on sum. Error %s \nSum was %s but should be %s%n", + error, actual, goldSum); + } + return !failed; + }) + .orElse(true); + + return aValid && bValid && cValid && sumValid; + } + + private static double durationToSeconds(Duration d) { + return d.toNanos() / (double) TimeUnit.SECONDS.toNanos(1); + } + + private static List> mkCsvRow( + List xs, String name, int totalBytes, double megaScale, Options opt) { + DoubleSummaryStatistics stats = + xs.stream().skip(1).mapToDouble(Main::durationToSeconds).summaryStatistics(); + if (stats.getCount() <= 0) { + throw new IllegalArgumentException("No min/max for " + name + "(size=" + totalBytes + ")"); + } + double mbps = megaScale * (double) totalBytes / stats.getMin(); + return opt.csv + ? Arrays.asList( + new SimpleImmutableEntry<>("function", name), + new SimpleImmutableEntry<>("num_times", opt.numtimes + ""), + new SimpleImmutableEntry<>("n_elements", opt.arraysize + ""), + new SimpleImmutableEntry<>("sizeof", totalBytes + ""), + new SimpleImmutableEntry<>( + "max_m" + (opt.mibibytes ? "i" : "") + "bytes_per_sec", mbps + ""), + new SimpleImmutableEntry<>("min_runtime", stats.getMin() + ""), + new SimpleImmutableEntry<>("max_runtime", stats.getMax() + ""), + new SimpleImmutableEntry<>("avg_runtime", stats.getAverage() + "")) + : Arrays.asList( + new SimpleImmutableEntry<>("Function", name), + new SimpleImmutableEntry<>( + "M" + (opt.mibibytes ? "i" : "") + "Bytes/sec", String.format("%.3f", mbps)), + new SimpleImmutableEntry<>("Min (sec)", String.format("%.5f", stats.getMin())), + new SimpleImmutableEntry<>("Max", String.format("%.5f", stats.getMax())), + new SimpleImmutableEntry<>("Average", String.format("%.5f", stats.getAverage()))); + } + + private static String padSpace(String s, int length) { + if (length == 0) return s; + return String.format("%1$-" + length + "s", s); + } + + @SafeVarargs + @SuppressWarnings("varargs") + private static void tabulateCsv(boolean csv, List>... rows) { + if (rows.length == 0) throw new IllegalArgumentException("Empty tabulation"); + int padding = csv ? 0 : 12; + String sep = csv ? "," : ""; + System.out.println( + rows[0].stream().map(x -> padSpace(x.getKey(), padding)).collect(Collectors.joining(sep))); + for (List> row : rows) { + System.out.println( + row.stream().map(x -> padSpace(x.getValue(), padding)).collect(Collectors.joining(sep))); + } + } + + private static final String VERSION = "3.4"; + + private static final float START_SCALAR = 0.4f; + private static final float START_A = 0.1f; + private static final float START_B = 0.2f; + private static final float START_C = 0.0f; + + private static final List IMPLEMENTATIONS = + Arrays.asList( + new Implementation("jdk-stream", JdkStreams.FLOAT, JdkStreams.DOUBLE), + new Implementation("jdk-plain", PlainStream.FLOAT, PlainStream.DOUBLE), + new Implementation("tornadovm", TornadoVMStreams.FLOAT, TornadoVMStreams.DOUBLE), + new Implementation("aparapi", AparapiStreams.FLOAT, AparapiStreams.DOUBLE)); + + public static int run(String[] args) { + Options opt = new Options(); + JCommander.newBuilder().addObject(opt).build().parse(args); + + final Benchmark benchmark; + if (opt.nstreamOnly && opt.triadOnly) + throw new RuntimeException( + "Both triad and nstream are enabled, pick one or omit both to run all benchmarks"); + else if (opt.nstreamOnly) benchmark = Benchmark.NSTREAM; + else if (opt.triadOnly) benchmark = Benchmark.TRIAD; + else benchmark = Benchmark.ALL; + + final Config floatConfig = + new Config<>( + opt, + benchmark, + Float.BYTES, + Float.class, // XXX not Float.TYPE, we want the boxed one + Math.ulp(1.f), + START_SCALAR, + START_A, + START_B, + START_C); + final Config doubleConfig = + new Config<>( + opt, + benchmark, + Double.BYTES, + Double.class, // XXX not Double.TYPE, we want the boxed one + Math.ulp(1.d), + (double) START_SCALAR, + (double) START_A, + (double) START_B, + (double) START_C); + + if (opt.list) { + System.out.println("Set implementation with --impl and device with --device :"); + for (Implementation entry : IMPLEMENTATIONS) { + System.out.println("Implementation: " + entry.name); + try { + List devices = entry.makeDouble.apply(doubleConfig).listDevices(); + for (int i = 0; i < devices.size(); i++) { + System.out.println("\t[" + i + "] " + devices.get(i)); + } + } catch (Exception e) { + System.out.println("\t(Unsupported: " + e.getMessage() + ")"); + } + } + return 0; + } + + String implName = (opt.impl.isEmpty()) ? IMPLEMENTATIONS.get(0).name : opt.impl; + Implementation impl = + IMPLEMENTATIONS.stream() + .filter(x -> implName.compareToIgnoreCase(x.name) == 0) + .findFirst() + .orElseThrow( + () -> + new IllegalArgumentException("Implementation " + opt.impl + " does not exist")); + + boolean ok = + opt.useFloat + ? run(impl.name, floatConfig, impl.makeFloat) + : run(impl.name, doubleConfig, impl.makeDouble); + + return ok ? 0 : 1; + } + + public static void main(String[] args) { + System.exit(run(args)); + } +} diff --git a/java-stream/src/main/java/javastream/aparapi/AparapiStreams.java b/java-stream/src/main/java/javastream/aparapi/AparapiStreams.java new file mode 100644 index 0000000..ab2de52 --- /dev/null +++ b/java-stream/src/main/java/javastream/aparapi/AparapiStreams.java @@ -0,0 +1,129 @@ +package javastream.aparapi; + +import com.aparapi.device.Device; +import com.aparapi.device.Device.TYPE; +import com.aparapi.device.JavaDevice; +import com.aparapi.device.OpenCLDevice; +import com.aparapi.internal.kernel.KernelManager; +import java.util.Collection; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javastream.JavaStream; +import javastream.Main.Config; + +public final class AparapiStreams { + + private AparapiStreams() {} + + public static final Function, JavaStream> DOUBLE = + config -> new Generic<>(config, SpecialisedDoubleKernel::new); + + public static final Function, JavaStream> FLOAT = + config -> new Generic<>(config, SpecialisedFloatKernel::new); + + private static List enumerateDevices() { + + // JavaDevice.SEQUENTIAL doesn't work when arraysize > 1, so we omit it entirely + Stream cpuDevices = Stream.of(JavaDevice.ALTERNATIVE_ALGORITHM); + + Stream clDevices = + Stream.of(TYPE.values()).map(OpenCLDevice::listDevices).flatMap(Collection::stream); + + return Stream.concat(clDevices, cpuDevices).collect(Collectors.toList()); + } + + private static String deviceName(Device device) { + return device.toString(); + } + + private static final class Generic extends JavaStream { + + private final GenericAparapiStreamKernel kernels; + + Generic(Config config, GenericAparapiStreamKernel.Factory factory) { + super(config); + Device device = enumerateDevices().get(config.options.device); + + final int numGroups; + final int workGroupSize; + if (device instanceof JavaDevice) { + numGroups = Runtime.getRuntime().availableProcessors(); + workGroupSize = + config.typeSize * 2; // closest thing to CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE + + } else if (device instanceof OpenCLDevice) { + numGroups = ((OpenCLDevice) device).getMaxComputeUnits(); + workGroupSize = device.getMaxWorkGroupSize(); + } else { + throw new AssertionError("Unknown device type " + device.getClass()); + } + + if (config.options.isVerboseBenchmark()) { + System.out.println("Using Aparapi OpenCL device: " + device); + System.out.println(" - numGroups : " + numGroups); + System.out.println(" - workGroupSize : " + workGroupSize); + String showCL = System.getProperty("com.aparapi.enableShowGeneratedOpenCL"); + if (showCL == null || !showCL.equals("true")) { + System.out.println( + "(Add `-Dcom.aparapi.enableShowGeneratedOpenCL=true` to show generated OpenCL source)"); + } + } + + LinkedHashSet candidate = new LinkedHashSet<>(); + candidate.add(device); + + kernels = factory.create(config, numGroups, workGroupSize); + KernelManager.instance().setPreferredDevices(kernels, candidate); + } + + @Override + public List listDevices() { + return enumerateDevices().stream() + .map(AparapiStreams::deviceName) + .collect(Collectors.toList()); + } + + @Override + public void initArrays() { + kernels.init(); + } + + @Override + public void copy() { + kernels.copy(); + } + + @Override + public void mul() { + kernels.mul(); + } + + @Override + public void add() { + kernels.add(); + } + + @Override + public void triad() { + kernels.triad(); + } + + @Override + public void nstream() { + kernels.nstream(); + } + + @Override + public T dot() { + return kernels.dot(); + } + + @Override + public Data data() { + return kernels.syncAndDispose(); + } + } +} diff --git a/java-stream/src/main/java/javastream/aparapi/GenericAparapiStreamKernel.java b/java-stream/src/main/java/javastream/aparapi/GenericAparapiStreamKernel.java new file mode 100644 index 0000000..526b472 --- /dev/null +++ b/java-stream/src/main/java/javastream/aparapi/GenericAparapiStreamKernel.java @@ -0,0 +1,68 @@ +package javastream.aparapi; + +import com.aparapi.Kernel; +import com.aparapi.Range; +import javastream.JavaStream.Data; +import javastream.Main.Config; + +abstract class GenericAparapiStreamKernel extends Kernel { + + protected static final int FN_COPY = 1; + protected static final int FN_MUL = 2; + protected static final int FN_ADD = 3; + protected static final int FN_TRIAD = 4; + protected static final int FN_NSTREAM = 5; + protected static final int FN_DOT = 6; + protected final Config config; + protected final int arraysize, numGroups, workGroupSize; + + interface Factory { + GenericAparapiStreamKernel create(Config config, int numGroups, int workGroupSize); + } + + GenericAparapiStreamKernel(Config config, int numGroups, int workGroupSize) { + this.config = config; + this.arraysize = config.options.arraysize; + this.numGroups = numGroups; + this.workGroupSize = workGroupSize; + setExplicit(true); + } + + protected int function; + + public abstract void init(); + + public void copy() { + function = FN_COPY; + execute(arraysize); + } + + public void mul() { + function = FN_MUL; + execute(arraysize); + } + + public void add() { + function = FN_ADD; + execute(arraysize); + } + + public void triad() { + function = FN_TRIAD; + execute(arraysize); + } + + public void nstream() { + function = FN_NSTREAM; + execute(arraysize); + } + + protected Kernel partialDot() { + function = FN_DOT; + return execute(Range.create(numGroups * workGroupSize, workGroupSize)); + } + + abstract T dot(); + + abstract Data syncAndDispose(); +} diff --git a/java-stream/src/main/java/javastream/aparapi/SpecialisedDoubleKernel.java b/java-stream/src/main/java/javastream/aparapi/SpecialisedDoubleKernel.java new file mode 100644 index 0000000..56a59af --- /dev/null +++ b/java-stream/src/main/java/javastream/aparapi/SpecialisedDoubleKernel.java @@ -0,0 +1,74 @@ +package javastream.aparapi; + +import java.util.Arrays; +import javastream.JavaStream; +import javastream.JavaStream.Data; +import javastream.Main.Config; + +final class SpecialisedDoubleKernel extends GenericAparapiStreamKernel { + private final double scalar; + final double[] a, b, c; + private final double[] partialSum; + @Local private final double[] workGroupSum; + + SpecialisedDoubleKernel(Config config, int numGroups, int workGroupSize) { + super(config, numGroups, workGroupSize); + this.scalar = config.scalar; + this.a = new double[this.arraysize]; + this.b = new double[this.arraysize]; + this.c = new double[this.arraysize]; + + this.partialSum = new double[numGroups]; + this.workGroupSum = new double[workGroupSize]; + } + + @SuppressWarnings("DuplicatedCode") + @Override + public void run() { + int i = getGlobalId(); + if (function == FN_COPY) { + c[i] = a[i]; + } else if (function == FN_MUL) { + b[i] = scalar * c[i]; + } else if (function == FN_ADD) { + c[i] = a[i] + b[i]; + } else if (function == FN_TRIAD) { + a[i] = b[i] + scalar * c[i]; + } else if (function == FN_NSTREAM) { + a[i] += b[i] + scalar * c[i]; + } else if (function == FN_DOT) { + int localId = getLocalId(0); + workGroupSum[localId] = 0.0; + for (; i < arraysize; i += getGlobalSize(0)) workGroupSum[localId] += a[i] * b[i]; + for (int offset = getLocalSize(0) / 2; offset > 0; offset /= 2) { + localBarrier(); + if (localId < offset) { + workGroupSum[localId] += workGroupSum[localId + offset]; + } + } + if (localId == 0) partialSum[getGroupId(0)] = workGroupSum[localId]; + } + } + + @Override + public void init() { + Arrays.fill(a, config.initA); + Arrays.fill(b, config.initB); + Arrays.fill(c, config.initC); + put(a).put(b).put(c); + } + + @Override + public Double dot() { + partialDot().get(partialSum); + double sum = 0; + for (double v : partialSum) sum += v; + return sum; + } + + @Override + public Data syncAndDispose() { + get(a).get(b).get(c).dispose(); + return new Data<>(JavaStream.boxed(a), JavaStream.boxed(b), JavaStream.boxed(c)); + } +} diff --git a/java-stream/src/main/java/javastream/aparapi/SpecialisedFloatKernel.java b/java-stream/src/main/java/javastream/aparapi/SpecialisedFloatKernel.java new file mode 100644 index 0000000..6919f06 --- /dev/null +++ b/java-stream/src/main/java/javastream/aparapi/SpecialisedFloatKernel.java @@ -0,0 +1,75 @@ +package javastream.aparapi; + +import static javastream.JavaStream.boxed; + +import java.util.Arrays; +import javastream.JavaStream.Data; +import javastream.Main.Config; + +final class SpecialisedFloatKernel extends GenericAparapiStreamKernel { + private final float scalar; + final float[] a, b, c; + private final float[] partialSum; + @Local private final float[] workGroupSum; + + SpecialisedFloatKernel(Config config, int numGroups, int workGroupSize) { + super(config, numGroups, workGroupSize); + this.scalar = config.scalar; + this.a = new float[this.arraysize]; + this.b = new float[this.arraysize]; + this.c = new float[this.arraysize]; + + this.partialSum = new float[numGroups]; + this.workGroupSum = new float[workGroupSize]; + } + + @SuppressWarnings("DuplicatedCode") + @Override + public void run() { + int i = getGlobalId(); + if (function == FN_COPY) { + c[i] = a[i]; + } else if (function == FN_MUL) { + b[i] = scalar * c[i]; + } else if (function == FN_ADD) { + c[i] = a[i] + b[i]; + } else if (function == FN_TRIAD) { + a[i] = b[i] + scalar * c[i]; + } else if (function == FN_NSTREAM) { + a[i] += b[i] + scalar * c[i]; + } else if (function == FN_DOT) { + int localId = getLocalId(0); + workGroupSum[localId] = 0.f; + for (; i < arraysize; i += getGlobalSize(0)) workGroupSum[localId] += a[i] * b[i]; + for (int offset = getLocalSize(0) / 2; offset > 0; offset /= 2) { + localBarrier(); + if (localId < offset) { + workGroupSum[localId] += workGroupSum[localId + offset]; + } + } + if (localId == 0) partialSum[getGroupId(0)] = workGroupSum[localId]; + } + } + + @Override + public void init() { + Arrays.fill(a, config.initA); + Arrays.fill(b, config.initB); + Arrays.fill(c, config.initC); + put(a).put(b).put(c); + } + + @Override + public Float dot() { + partialDot().get(partialSum); + float sum = 0; + for (float v : partialSum) sum += v; + return sum; + } + + @Override + public Data syncAndDispose() { + get(a).get(b).get(c).dispose(); + return new Data<>(boxed(a), boxed(b), boxed(c)); + } +} diff --git a/java-stream/src/main/java/javastream/jdk/GenericPlainStream.java b/java-stream/src/main/java/javastream/jdk/GenericPlainStream.java new file mode 100644 index 0000000..7f210fa --- /dev/null +++ b/java-stream/src/main/java/javastream/jdk/GenericPlainStream.java @@ -0,0 +1,92 @@ +package javastream.jdk; + +import static javastream.FractionalMaths.from; +import static javastream.FractionalMaths.plus; +import static javastream.FractionalMaths.times; + +import java.lang.reflect.Array; +import java.util.Collections; +import java.util.List; +import javastream.JavaStream; +import javastream.Main.Config; + +final class GenericPlainStream extends JavaStream { + + private final T[] a; + private final T[] b; + private final T[] c; + + @SuppressWarnings("unchecked") + GenericPlainStream(Config config) { + super(config); + this.a = (T[]) Array.newInstance(config.evidence, config.options.arraysize); + this.b = (T[]) Array.newInstance(config.evidence, config.options.arraysize); + this.c = (T[]) Array.newInstance(config.evidence, config.options.arraysize); + } + + @Override + public List listDevices() { + return Collections.singletonList("JVM"); + } + + @Override + public void initArrays() { + for (int i = 0; i < config.options.arraysize; i++) { + a[i] = config.initA; + b[i] = config.initB; + c[i] = config.initC; + } + } + + @SuppressWarnings("ManualArrayCopy") + @Override + public void copy() { + for (int i = 0; i < config.options.arraysize; i++) { + c[i] = a[i]; + } + } + + @Override + public void mul() { + for (int i = 0; i < config.options.arraysize; i++) { + b[i] = times(config.scalar, c[i]); + } + } + + @Override + public void add() { + + for (int i = 0; i < config.options.arraysize; i++) { + c[i] = plus(a[i], b[i]); + } + } + + @Override + public void triad() { + + for (int i = 0; i < config.options.arraysize; i++) { + a[i] = plus(b[i], times(config.scalar, c[i])); + } + } + + @Override + public void nstream() { + for (int i = 0; i < config.options.arraysize; i++) { + a[i] = plus(a[i], plus(b[i], times(config.scalar, c[i]))); + } + } + + @Override + public T dot() { + T acc = from(config.evidence, 0); + for (int i = 0; i < config.options.arraysize; i++) { + acc = plus(acc, times(a[i], b[i])); + } + return acc; + } + + @Override + public Data data() { + return new Data<>(a, b, c); + } +} diff --git a/java-stream/src/main/java/javastream/jdk/GenericStream.java b/java-stream/src/main/java/javastream/jdk/GenericStream.java new file mode 100644 index 0000000..1e65b8f --- /dev/null +++ b/java-stream/src/main/java/javastream/jdk/GenericStream.java @@ -0,0 +1,86 @@ +package javastream.jdk; + +import static javastream.FractionalMaths.from; +import static javastream.FractionalMaths.plus; +import static javastream.FractionalMaths.times; + +import java.lang.reflect.Array; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.IntStream; +import javastream.FractionalMaths; +import javastream.JavaStream; +import javastream.Main.Config; + +/** + * We use + * + *
Arrays.parallelSetAll
+ * + *

here as it internally calls + * + *

IntStream.range(0, array.length).parallel().forEach(...)
+ */ +final class GenericStream extends JavaStream { + + private final T[] a, b, c; + + @SuppressWarnings("unchecked") + GenericStream(Config config) { + super(config); + this.a = (T[]) Array.newInstance(config.evidence, config.options.arraysize); + this.b = (T[]) Array.newInstance(config.evidence, config.options.arraysize); + this.c = (T[]) Array.newInstance(config.evidence, config.options.arraysize); + } + + @Override + public List listDevices() { + return Collections.singletonList("JVM"); + } + + @Override + public void initArrays() { + Arrays.parallelSetAll(a, i -> config.initA); + Arrays.parallelSetAll(b, i -> config.initB); + Arrays.parallelSetAll(c, i -> config.initC); + } + + @Override + public void copy() { + Arrays.parallelSetAll(c, i -> a[i]); + } + + @Override + public void mul() { + Arrays.parallelSetAll(b, i -> times(config.scalar, c[i])); + } + + @Override + public void add() { + Arrays.parallelSetAll(c, i -> plus(a[i], b[i])); + } + + @Override + public void triad() { + Arrays.parallelSetAll(a, i -> plus(b[i], times(config.scalar, c[i]))); + } + + @Override + public void nstream() { + Arrays.parallelSetAll(a, i -> plus(a[i], plus(b[i], times(config.scalar, c[i])))); + } + + @Override + public T dot() { + return IntStream.range(0, config.options.arraysize) + .parallel() + .mapToObj(i -> times(a[i], b[i])) + .reduce(from(config.evidence, 0), FractionalMaths::plus); + } + + @Override + public Data data() { + return new Data<>(a, b, c); + } +} diff --git a/java-stream/src/main/java/javastream/jdk/JdkStreams.java b/java-stream/src/main/java/javastream/jdk/JdkStreams.java new file mode 100644 index 0000000..5b58be7 --- /dev/null +++ b/java-stream/src/main/java/javastream/jdk/JdkStreams.java @@ -0,0 +1,26 @@ +package javastream.jdk; + +import java.util.AbstractMap.SimpleImmutableEntry; +import java.util.function.Function; +import javastream.JavaStream; +import javastream.JavaStream.EnumeratedStream; +import javastream.Main.Config; + +public final class JdkStreams { + + private JdkStreams() {} + + public static final Function, JavaStream> FLOAT = + config -> + new EnumeratedStream<>( + config, + new SimpleImmutableEntry<>("specialised", SpecialisedFloatStream::new), + new SimpleImmutableEntry<>("generic", GenericStream::new)); + + public static final Function, JavaStream> DOUBLE = + config -> + new EnumeratedStream<>( + config, + new SimpleImmutableEntry<>("specialised", SpecialisedDoubleStream::new), + new SimpleImmutableEntry<>("generic", GenericStream::new)); +} diff --git a/java-stream/src/main/java/javastream/jdk/PlainStream.java b/java-stream/src/main/java/javastream/jdk/PlainStream.java new file mode 100644 index 0000000..f9281e8 --- /dev/null +++ b/java-stream/src/main/java/javastream/jdk/PlainStream.java @@ -0,0 +1,26 @@ +package javastream.jdk; + +import java.util.AbstractMap.SimpleImmutableEntry; +import java.util.function.Function; +import javastream.JavaStream; +import javastream.JavaStream.EnumeratedStream; +import javastream.Main.Config; + +public final class PlainStream { + + private PlainStream() {} + + public static final Function, JavaStream> FLOAT = + config -> + new EnumeratedStream<>( + config, + new SimpleImmutableEntry<>("specialised", SpecialisedPlainFloatStream::new), + new SimpleImmutableEntry<>("generic", GenericPlainStream::new)); + + public static final Function, JavaStream> DOUBLE = + config -> + new EnumeratedStream<>( + config, + new SimpleImmutableEntry<>("specialised", SpecialisedPlainDoubleStream::new), + new SimpleImmutableEntry<>("generic", GenericPlainStream::new)); +} diff --git a/java-stream/src/main/java/javastream/jdk/SpecialisedDoubleStream.java b/java-stream/src/main/java/javastream/jdk/SpecialisedDoubleStream.java new file mode 100644 index 0000000..26406a6 --- /dev/null +++ b/java-stream/src/main/java/javastream/jdk/SpecialisedDoubleStream.java @@ -0,0 +1,84 @@ +package javastream.jdk; + +import java.util.Collections; +import java.util.List; +import java.util.stream.IntStream; +import javastream.JavaStream; +import javastream.Main.Config; + +final class SpecialisedDoubleStream extends JavaStream { + + private final double[] a, b, c; + + SpecialisedDoubleStream(Config config) { + super(config); + this.a = new double[config.options.arraysize]; + this.b = new double[config.options.arraysize]; + this.c = new double[config.options.arraysize]; + } + + @Override + public List listDevices() { + return Collections.singletonList("JVM"); + } + + @Override + public void initArrays() { + IntStream.range(0, config.options.arraysize) // + .parallel() + .forEach( + i -> { + a[i] = config.initA; + b[i] = config.initB; + c[i] = config.initC; + }); + } + + @Override + public void copy() { + IntStream.range(0, config.options.arraysize) // + .parallel() + .forEach(i -> c[i] = a[i]); + } + + @Override + public void mul() { + IntStream.range(0, config.options.arraysize) // + .parallel() + .forEach(i -> b[i] = config.scalar * c[i]); + } + + @Override + public void add() { + IntStream.range(0, config.options.arraysize) // + .parallel() + .forEach(i -> c[i] = a[i] + b[i]); + } + + @Override + public void triad() { + IntStream.range(0, config.options.arraysize) // + .parallel() + .forEach(i -> a[i] = b[i] + config.scalar * c[i]); + } + + @Override + public void nstream() { + IntStream.range(0, config.options.arraysize) // + .parallel() + .forEach(i -> a[i] += b[i] + config.scalar * c[i]); + } + + @Override + public Double dot() { + return IntStream.range(0, config.options.arraysize) + .parallel() + .mapToDouble(i -> a[i] * b[i]) + .reduce(0f, Double::sum); + } + + @Override + public Data data() { + return new Data<>(boxed(a), boxed(b), boxed(c)); + } +} diff --git a/java-stream/src/main/java/javastream/jdk/SpecialisedFloatStream.java b/java-stream/src/main/java/javastream/jdk/SpecialisedFloatStream.java new file mode 100644 index 0000000..6c414c1 --- /dev/null +++ b/java-stream/src/main/java/javastream/jdk/SpecialisedFloatStream.java @@ -0,0 +1,84 @@ +package javastream.jdk; + +import java.util.Collections; +import java.util.List; +import java.util.stream.IntStream; +import javastream.JavaStream; +import javastream.Main.Config; + +final class SpecialisedFloatStream extends JavaStream { + + private final float[] a, b, c; + + SpecialisedFloatStream(Config config) { + super(config); + this.a = new float[config.options.arraysize]; + this.b = new float[config.options.arraysize]; + this.c = new float[config.options.arraysize]; + } + + @Override + public List listDevices() { + return Collections.singletonList("JVM"); + } + + @Override + public void initArrays() { + IntStream.range(0, config.options.arraysize) // + .parallel() + .forEach( + i -> { + a[i] = config.initA; + b[i] = config.initB; + c[i] = config.initC; + }); + } + + @Override + public void copy() { + IntStream.range(0, config.options.arraysize) // + .parallel() + .forEach(i -> c[i] = a[i]); + } + + @Override + public void mul() { + IntStream.range(0, config.options.arraysize) // + .parallel() + .forEach(i -> b[i] = config.scalar * c[i]); + } + + @Override + public void add() { + IntStream.range(0, config.options.arraysize) // + .parallel() + .forEach(i -> c[i] = a[i] + b[i]); + } + + @Override + public void triad() { + IntStream.range(0, config.options.arraysize) // + .parallel() + .forEach(i -> a[i] = b[i] + config.scalar * c[i]); + } + + @Override + public void nstream() { + IntStream.range(0, config.options.arraysize) // + .parallel() + .forEach(i -> a[i] += b[i] + config.scalar * c[i]); + } + + @Override + public Float dot() { + return IntStream.range(0, config.options.arraysize) // + .parallel() + .mapToObj(i -> a[i] * b[i]) // XXX there isn't a specialised Stream for floats + .reduce(0f, Float::sum); + } + + @Override + public Data data() { + return new Data<>(boxed(a), boxed(b), boxed(c)); + } +} diff --git a/java-stream/src/main/java/javastream/jdk/SpecialisedPlainDoubleStream.java b/java-stream/src/main/java/javastream/jdk/SpecialisedPlainDoubleStream.java new file mode 100644 index 0000000..afda2ef --- /dev/null +++ b/java-stream/src/main/java/javastream/jdk/SpecialisedPlainDoubleStream.java @@ -0,0 +1,84 @@ +package javastream.jdk; + +import java.util.Collections; +import java.util.List; +import javastream.JavaStream; +import javastream.Main.Config; + +final class SpecialisedPlainDoubleStream extends JavaStream { + + private final double[] a; + private final double[] b; + private final double[] c; + + SpecialisedPlainDoubleStream(Config config) { + super(config); + this.a = new double[config.options.arraysize]; + this.b = new double[config.options.arraysize]; + this.c = new double[config.options.arraysize]; + } + + @Override + public List listDevices() { + return Collections.singletonList("JVM"); + } + + @Override + public void initArrays() { + for (int i = 0; i < config.options.arraysize; i++) { + a[i] = config.initA; + b[i] = config.initB; + c[i] = config.initC; + } + } + + @SuppressWarnings("ManualArrayCopy") + @Override + public void copy() { + for (int i = 0; i < config.options.arraysize; i++) { + c[i] = a[i]; + } + } + + @Override + public void mul() { + for (int i = 0; i < config.options.arraysize; i++) { + b[i] = config.scalar * c[i]; + } + } + + @Override + public void add() { + for (int i = 0; i < config.options.arraysize; i++) { + c[i] = a[i] + b[i]; + } + } + + @Override + public void triad() { + for (int i = 0; i < config.options.arraysize; i++) { + a[i] = b[i] + config.scalar * c[i]; + } + } + + @Override + public void nstream() { + for (int i = 0; i < config.options.arraysize; i++) { + a[i] += b[i] + config.scalar * c[i]; + } + } + + @Override + public Double dot() { + double acc = 0f; + for (int i = 0; i < config.options.arraysize; i++) { + acc += a[i] * b[i]; + } + return acc; + } + + @Override + public Data data() { + return new Data<>(boxed(a), boxed(b), boxed(c)); + } +} diff --git a/java-stream/src/main/java/javastream/jdk/SpecialisedPlainFloatStream.java b/java-stream/src/main/java/javastream/jdk/SpecialisedPlainFloatStream.java new file mode 100644 index 0000000..9ccee53 --- /dev/null +++ b/java-stream/src/main/java/javastream/jdk/SpecialisedPlainFloatStream.java @@ -0,0 +1,84 @@ +package javastream.jdk; + +import java.util.Collections; +import java.util.List; +import javastream.JavaStream; +import javastream.Main.Config; + +final class SpecialisedPlainFloatStream extends JavaStream { + + private final float[] a; + private final float[] b; + private final float[] c; + + SpecialisedPlainFloatStream(Config config) { + super(config); + this.a = new float[config.options.arraysize]; + this.b = new float[config.options.arraysize]; + this.c = new float[config.options.arraysize]; + } + + @Override + public List listDevices() { + return Collections.singletonList("JVM"); + } + + @Override + public void initArrays() { + for (int i = 0; i < config.options.arraysize; i++) { + a[i] = config.initA; + b[i] = config.initB; + c[i] = config.initC; + } + } + + @SuppressWarnings("ManualArrayCopy") + @Override + public void copy() { + for (int i = 0; i < config.options.arraysize; i++) { + c[i] = a[i]; + } + } + + @Override + public void mul() { + for (int i = 0; i < config.options.arraysize; i++) { + b[i] = config.scalar * c[i]; + } + } + + @Override + public void add() { + for (int i = 0; i < config.options.arraysize; i++) { + c[i] = a[i] + b[i]; + } + } + + @Override + public void triad() { + for (int i = 0; i < config.options.arraysize; i++) { + a[i] = b[i] + config.scalar * c[i]; + } + } + + @Override + public void nstream() { + for (int i = 0; i < config.options.arraysize; i++) { + a[i] += b[i] + config.scalar * c[i]; + } + } + + @Override + public Float dot() { + float acc = 0f; + for (int i = 0; i < config.options.arraysize; i++) { + acc += a[i] * b[i]; + } + return acc; + } + + @Override + public Data data() { + return new Data<>(boxed(a), boxed(b), boxed(c)); + } +} diff --git a/java-stream/src/main/java/javastream/tornadovm/GenericTornadoVMStream.java b/java-stream/src/main/java/javastream/tornadovm/GenericTornadoVMStream.java new file mode 100644 index 0000000..d936df6 --- /dev/null +++ b/java-stream/src/main/java/javastream/tornadovm/GenericTornadoVMStream.java @@ -0,0 +1,98 @@ +package javastream.tornadovm; + +import java.util.List; +import java.util.stream.Collectors; +import javastream.JavaStream; +import javastream.Main.Config; +import uk.ac.manchester.tornado.api.TaskSchedule; +import uk.ac.manchester.tornado.api.TornadoRuntimeCI; +import uk.ac.manchester.tornado.api.common.TornadoDevice; +import uk.ac.manchester.tornado.api.runtime.TornadoRuntime; + +abstract class GenericTornadoVMStream extends JavaStream { + + protected final TornadoDevice device; + + protected TaskSchedule copyTask; + protected TaskSchedule mulTask; + protected TaskSchedule addTask; + protected TaskSchedule triadTask; + protected TaskSchedule nstreamTask; + protected TaskSchedule dotTask; + + GenericTornadoVMStream(Config config) { + super(config); + + try { + TornadoRuntimeCI runtime = TornadoRuntime.getTornadoRuntime(); + List devices = TornadoVMStreams.enumerateDevices(runtime); + device = devices.get(config.options.device); + + if (config.options.isVerboseBenchmark()) { + System.out.println("Using TornadoVM device:"); + System.out.println(" - Name : " + device.getDescription()); + System.out.println(" - Id : " + device.getDeviceName()); + System.out.println(" - Platform : " + device.getPlatformName()); + System.out.println(" - Backend : " + device.getTornadoVMBackend().name()); + } + } catch (Throwable e) { + throw new RuntimeException( + "Unable to initialise TornadoVM, make sure you are running the binary with the `tornado -jar ...` wrapper and not `java -jar ...`", + e); + } + } + + protected static TaskSchedule mkSchedule() { + return new TaskSchedule(""); + } + + @Override + public List listDevices() { + return TornadoVMStreams.enumerateDevices(TornadoRuntime.getTornadoRuntime()).stream() + .map(d -> d.getDescription() + "(" + d.getDeviceName() + ")") + .collect(Collectors.toList()); + } + + @Override + public void initArrays() { + this.copyTask.warmup(); + this.mulTask.warmup(); + this.addTask.warmup(); + this.triadTask.warmup(); + this.nstreamTask.warmup(); + this.dotTask.warmup(); + } + + @Override + public void copy() { + this.copyTask.execute(); + } + + @Override + public void mul() { + this.mulTask.execute(); + } + + @Override + public void add() { + this.addTask.execute(); + } + + @Override + public void triad() { + this.triadTask.execute(); + } + + @Override + public void nstream() { + this.nstreamTask.execute(); + } + + protected abstract T getSum(); + + @Override + public T dot() { + this.dotTask.execute(); + return getSum(); + } +} diff --git a/java-stream/src/main/java/javastream/tornadovm/SpecialisedDouble.java b/java-stream/src/main/java/javastream/tornadovm/SpecialisedDouble.java new file mode 100644 index 0000000..7712e31 --- /dev/null +++ b/java-stream/src/main/java/javastream/tornadovm/SpecialisedDouble.java @@ -0,0 +1,88 @@ +package javastream.tornadovm; + +import java.util.Arrays; +import javastream.Main.Config; +import uk.ac.manchester.tornado.api.annotations.Parallel; +import uk.ac.manchester.tornado.api.annotations.Reduce; + +final class SpecialisedDouble extends GenericTornadoVMStream { + + @SuppressWarnings("ManualArrayCopy") + private static void copy(int size, double[] a, double[] c) { + for (@Parallel int i = 0; i < size; i++) { + c[i] = a[i]; + } + } + + private static void mul(int size, double[] b, double[] c, double scalar) { + for (@Parallel int i = 0; i < size; i++) { + b[i] = scalar * c[i]; + } + } + + private static void add(int size, double[] a, double[] b, double[] c) { + for (@Parallel int i = 0; i < size; i++) { + c[i] = a[i] + b[i]; + } + } + + private static void triad(int size, double[] a, double[] b, double[] c, double scalar) { + for (@Parallel int i = 0; i < size; i++) { + a[i] = b[i] + scalar * c[i]; + } + } + + private static void nstream(int size, double[] a, double[] b, double[] c, double scalar) { + for (@Parallel int i = 0; i < size; i++) { + a[i] = b[i] * scalar * c[i]; + } + } + + private static void dot_( + double[] a, double[] b, @Reduce double[] acc) { // prevent name clash with CL's dot + acc[0] = 0; + for (@Parallel int i = 0; i < a.length; i++) { + acc[0] += a[i] * b[i]; + } + } + + private final double[] a, b, c; + private final double[] dotSum; + + @SuppressWarnings({"PrimitiveArrayArgumentToVarargsMethod", "DuplicatedCode"}) + SpecialisedDouble(Config config) { + super(config); + final int size = config.options.arraysize; + final double scalar = config.scalar; + a = new double[size]; + b = new double[size]; + c = new double[size]; + dotSum = new double[1]; + this.copyTask = mkSchedule().task("", SpecialisedDouble::copy, size, a, c); + this.mulTask = mkSchedule().task("", SpecialisedDouble::mul, size, b, c, scalar); + this.addTask = mkSchedule().task("", SpecialisedDouble::add, size, a, b, c); + this.triadTask = mkSchedule().task("", SpecialisedDouble::triad, size, a, b, c, scalar); + this.nstreamTask = mkSchedule().task("", SpecialisedDouble::nstream, size, a, b, c, scalar); + this.dotTask = mkSchedule().task("", SpecialisedDouble::dot_, a, b, dotSum).streamOut(dotSum); + } + + @Override + public void initArrays() { + super.initArrays(); + Arrays.fill(a, config.initA); + Arrays.fill(b, config.initB); + Arrays.fill(c, config.initC); + TornadoVMStreams.xferToDevice(device, a, b, c); + } + + @Override + protected Double getSum() { + return dotSum[0]; + } + + @Override + public Data data() { + TornadoVMStreams.xferFromDevice(device, a, b, c); + return new Data<>(boxed(a), boxed(b), boxed(c)); + } +} diff --git a/java-stream/src/main/java/javastream/tornadovm/SpecialisedFloat.java b/java-stream/src/main/java/javastream/tornadovm/SpecialisedFloat.java new file mode 100644 index 0000000..e61cfe9 --- /dev/null +++ b/java-stream/src/main/java/javastream/tornadovm/SpecialisedFloat.java @@ -0,0 +1,88 @@ +package javastream.tornadovm; + +import java.util.Arrays; +import javastream.Main.Config; +import uk.ac.manchester.tornado.api.annotations.Parallel; +import uk.ac.manchester.tornado.api.annotations.Reduce; + +final class SpecialisedFloat extends GenericTornadoVMStream { + + @SuppressWarnings("ManualArrayCopy") + private static void copy(int size, float[] a, float[] c) { + for (@Parallel int i = 0; i < size; i++) { + c[i] = a[i]; + } + } + + private static void mul(int size, float[] b, float[] c, float scalar) { + for (@Parallel int i = 0; i < size; i++) { + b[i] = scalar * c[i]; + } + } + + private static void add(int size, float[] a, float[] b, float[] c) { + for (@Parallel int i = 0; i < size; i++) { + c[i] = a[i] + b[i]; + } + } + + private static void triad(int size, float[] a, float[] b, float[] c, float scalar) { + for (@Parallel int i = 0; i < size; i++) { + a[i] = b[i] + scalar * c[i]; + } + } + + private static void nstream(int size, float[] a, float[] b, float[] c, float scalar) { + for (@Parallel int i = 0; i < size; i++) { + a[i] = b[i] * scalar * c[i]; + } + } + + private static void dot_( + float[] a, float[] b, @Reduce float[] acc) { // prevent name clash with CL's dot + acc[0] = 0; + for (@Parallel int i = 0; i < a.length; i++) { + acc[0] += a[i] * b[i]; + } + } + + private final float[] a, b, c; + private final float[] dotSum; + + @SuppressWarnings({"PrimitiveArrayArgumentToVarargsMethod", "DuplicatedCode"}) + SpecialisedFloat(Config config) { + super(config); + final int size = config.options.arraysize; + final float scalar = config.scalar; + a = new float[size]; + b = new float[size]; + c = new float[size]; + dotSum = new float[1]; + this.copyTask = mkSchedule().task("", SpecialisedFloat::copy, size, a, c); + this.mulTask = mkSchedule().task("", SpecialisedFloat::mul, size, b, c, scalar); + this.addTask = mkSchedule().task("", SpecialisedFloat::add, size, a, b, c); + this.triadTask = mkSchedule().task("", SpecialisedFloat::triad, size, a, b, c, scalar); + this.nstreamTask = mkSchedule().task("", SpecialisedFloat::nstream, size, a, b, c, scalar); + this.dotTask = mkSchedule().task("", SpecialisedFloat::dot_, a, b, dotSum).streamOut(dotSum); + } + + @Override + public void initArrays() { + super.initArrays(); + Arrays.fill(a, config.initA); + Arrays.fill(b, config.initB); + Arrays.fill(c, config.initC); + TornadoVMStreams.xferToDevice(device, a, b, c); + } + + @Override + protected Float getSum() { + return dotSum[0]; + } + + @Override + public Data data() { + TornadoVMStreams.xferFromDevice(device, a, b, c); + return new Data<>(boxed(a), boxed(b), boxed(c)); + } +} diff --git a/java-stream/src/main/java/javastream/tornadovm/TornadoVMStreams.java b/java-stream/src/main/java/javastream/tornadovm/TornadoVMStreams.java new file mode 100644 index 0000000..68eecad --- /dev/null +++ b/java-stream/src/main/java/javastream/tornadovm/TornadoVMStreams.java @@ -0,0 +1,42 @@ +package javastream.tornadovm; + +import java.util.List; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import javastream.JavaStream; +import javastream.Main.Config; +import uk.ac.manchester.tornado.api.TornadoRuntimeCI; +import uk.ac.manchester.tornado.api.common.TornadoDevice; +import uk.ac.manchester.tornado.api.mm.TornadoGlobalObjectState; +import uk.ac.manchester.tornado.api.runtime.TornadoRuntime; + +public final class TornadoVMStreams { + + private TornadoVMStreams() {} + + static void xferToDevice(TornadoDevice device, Object... xs) { + for (Object x : xs) { + TornadoGlobalObjectState state = TornadoRuntime.getTornadoRuntime().resolveObject(x); + List writeEvent = device.ensurePresent(x, state.getDeviceState(device), null, 0, 0); + if (writeEvent != null) writeEvent.forEach(e -> device.resolveEvent(e).waitOn()); + } + } + + static void xferFromDevice(TornadoDevice device, Object... xs) { + for (Object x : xs) { + TornadoGlobalObjectState state = TornadoRuntime.getTornadoRuntime().resolveObject(x); + device.resolveEvent(device.streamOut(x, 0, state.getDeviceState(device), null)).waitOn(); + } + } + + static List enumerateDevices(TornadoRuntimeCI runtime) { + return IntStream.range(0, runtime.getNumDrivers()) + .mapToObj(runtime::getDriver) + .flatMap(d -> IntStream.range(0, d.getDeviceCount()).mapToObj(d::getDevice)) + .collect(Collectors.toList()); + } + + public static final Function, JavaStream> FLOAT = SpecialisedFloat::new; + public static final Function, JavaStream> DOUBLE = SpecialisedDouble::new; +} diff --git a/java-stream/src/test/java/javastream/SmokeTest.java b/java-stream/src/test/java/javastream/SmokeTest.java new file mode 100644 index 0000000..2ceca44 --- /dev/null +++ b/java-stream/src/test/java/javastream/SmokeTest.java @@ -0,0 +1,93 @@ +package javastream; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +public class SmokeTest { + + // taken from https://stackoverflow.com/a/32146095/896997 + private static Stream> ofCombinations( + List> collections, List current) { + return collections.isEmpty() + ? Stream.of(current) + : collections.get(0).stream() + .flatMap( + e -> { + List list = new ArrayList<>(current); + list.add(e); + return ofCombinations(collections.subList(1, collections.size()), list); + }); + } + + @SuppressWarnings("unused") + private static Stream options() { + + LinkedHashMap> impls = new LinkedHashMap<>(); + impls.put("jdk-stream", Arrays.asList(0, 1)); + impls.put("jdk-plain", Arrays.asList(0, 1)); + // skip aparapi as none of the jdk fallbacks work correctly + // skip tornadovm as it has no jdk fallback + + List configs = + impls.entrySet().stream() + .flatMap( + e -> + Stream.concat(Stream.of(""), e.getValue().stream().map(i -> "--device " + i)) + .map(d -> "--impl " + e.getKey() + " " + d)) + .collect(Collectors.toList()); + + return ofCombinations( + new ArrayList<>( + Arrays.asList( + configs, + Arrays.asList("", "--csv"), + // XXX floats usually have a 1.0^-5 error which misses 10^-8 + Arrays.asList("", "--float --dot-tolerance 1.0e-5"), + Arrays.asList("", "--triad-only", "--nstream-only"), + Arrays.asList("", "--mibibytes"))), + Collections.emptyList()) + .map( + xs -> + Arguments.of( + xs.stream() // + .map(String::trim) // + .collect(Collectors.joining(" ")) + .trim())); + } + + @ParameterizedTest + @MethodSource("options") + void testIt(String args) { + String line = "--arraysize 2048 " + args; + + // redirect stdout/stderr and only print if anything fails + ByteArrayOutputStream outContent = new ByteArrayOutputStream(); + ByteArrayOutputStream errContent = new ByteArrayOutputStream(); + PrintStream originalOut = System.out; + PrintStream originalErr = System.err; + + System.setOut(new PrintStream(outContent)); + System.setErr(new PrintStream(errContent)); + int run = Main.run(line.split("\\s+")); + System.setOut(originalOut); + System.setErr(originalErr); + + if (run != 0) { + System.out.println(outContent); + System.err.println(errContent); + Assertions.assertEquals(0, run, "`" + line + "` did not return 0"); + } + } +}