diff --git a/CHANGELOG.md b/CHANGELOG.md
index 605d327..deba984 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ All notable changes to this project will be documented in this file.
- Thrust managed memory.
- HIP managed memory.
- New implementation using SYCL2020 USM (sycl2020-acc) and renamed original `sycl2020` to `sycl2020-acc`.
+- Data initialisation and read-back timing for all models, including Java, Scala, Julia, and Rust
+- Add support for the latest Aparapi (3.0.0) and TornadoVM (0.15.x) for Java
### Changed
- RAJA CUDA CMake build issues resolved.
@@ -17,6 +19,7 @@ All notable changes to this project will be documented in this file.
- Number of thread-blocks in CUDA dot kernel implementation changed to 1024.
- Fix compatibility of `sycl2020` (now `sycl2020-acc`) with hipSYCL.
- Bumped Julia compat to 1.9
+- Bumped Scala to 3.3.1
- Bumped Rust to 1.74.0-nightly (13e6f24b9 2023-09-23)
diff --git a/src/java/java-stream/pom.xml b/src/java/java-stream/pom.xml
index d28a3d5..78d26b3 100644
--- a/src/java/java-stream/pom.xml
+++ b/src/java/java-stream/pom.xml
@@ -12,7 +12,7 @@
UTF-8
UTF-8
- 5.7.2
+ 5.9.2
@@ -27,19 +27,19 @@
com.beust
jcommander
- 1.81
+ 1.82
tornado
tornado-api
- 0.9
+ 0.15.1
com.aparapi
aparapi
- 2.0.0
+ 3.0.0
diff --git a/src/java/java-stream/src/main/java/javastream/JavaStream.java b/src/java/java-stream/src/main/java/javastream/JavaStream.java
index 7ab96cb..4fdb229 100644
--- a/src/java/java-stream/src/main/java/javastream/JavaStream.java
+++ b/src/java/java-stream/src/main/java/javastream/JavaStream.java
@@ -56,7 +56,7 @@ public abstract class JavaStream {
protected abstract T dot();
- protected abstract Data data();
+ protected abstract Data readArrays();
public static class EnumeratedStream extends JavaStream {
@@ -113,8 +113,8 @@ public abstract class JavaStream {
}
@Override
- public Data data() {
- return actual.data();
+ public Data readArrays() {
+ return actual.readArrays();
}
}
@@ -140,6 +140,14 @@ public abstract class JavaStream {
return Duration.ofNanos(end - start);
}
+ final Duration runInitArrays() {
+ return timed(this::initArrays);
+ }
+
+ final SimpleImmutableEntry> runReadArrays() {
+ return timed(this::readArrays);
+ }
+
final SimpleImmutableEntry, T> runAll(int times) {
Timings timings = new Timings<>();
T lastSum = null;
diff --git a/src/java/java-stream/src/main/java/javastream/Main.java b/src/java/java-stream/src/main/java/javastream/Main.java
index 2442128..3732a24 100644
--- a/src/java/java-stream/src/main/java/javastream/Main.java
+++ b/src/java/java-stream/src/main/java/javastream/Main.java
@@ -128,6 +128,40 @@ public class Main {
}
}
+ @SuppressWarnings("unchecked")
+ static void showInit(
+ int totalBytes, double megaScale, Options opt, Duration init, Duration read) {
+ List> setup =
+ Arrays.asList(
+ new SimpleImmutableEntry<>("Init", durationToSeconds(init)),
+ new SimpleImmutableEntry<>("Read", durationToSeconds(read)));
+ if (opt.csv) {
+ tabulateCsv(
+ true,
+ setup.stream()
+ .map(
+ x ->
+ Arrays.asList(
+ new SimpleImmutableEntry<>("function", x.getKey()),
+ new SimpleImmutableEntry<>("n_elements", opt.arraysize + ""),
+ new SimpleImmutableEntry<>("sizeof", totalBytes + ""),
+ new SimpleImmutableEntry<>(
+ "max_m" + (opt.mibibytes ? "i" : "") + "bytes_per_sec",
+ ((megaScale * (double) totalBytes / x.getValue())) + ""),
+ new SimpleImmutableEntry<>("runtime", x.getValue() + "")))
+ .toArray(List[]::new));
+ } else {
+ for (Entry e : setup) {
+ System.out.printf(
+ "%s: %.5f s (%.5f M%sBytes/sec)%n",
+ e.getKey(),
+ e.getValue(),
+ megaScale * (double) totalBytes / e.getValue(),
+ opt.mibibytes ? "i" : "");
+ }
+ }
+ }
+
static boolean run(
String name, Config config, Function, JavaStream> mkStream) {
@@ -183,35 +217,46 @@ public class Main {
JavaStream stream = mkStream.apply(config);
- stream.initArrays();
-
+ Duration init = stream.runInitArrays();
final boolean ok;
switch (config.benchmark) {
case ALL:
- Entry, T> results = stream.runAll(opt.numtimes);
- ok = checkSolutions(stream.data(), config, Optional.of(results.getValue()));
- Timings timings = results.getKey();
- tabulateCsv(
- opt.csv,
- mkCsvRow(timings.copy, "Copy", 2 * arrayBytes, megaScale, opt),
- mkCsvRow(timings.mul, "Mul", 2 * arrayBytes, megaScale, opt),
- mkCsvRow(timings.add, "Add", 3 * arrayBytes, megaScale, opt),
- mkCsvRow(timings.triad, "Triad", 3 * arrayBytes, megaScale, opt),
- mkCsvRow(timings.dot, "Dot", 2 * arrayBytes, megaScale, opt));
- break;
+ {
+ Entry, T> results = stream.runAll(opt.numtimes);
+ SimpleImmutableEntry> read = stream.runReadArrays();
+ showInit(totalBytes, megaScale, opt, init, read.getKey());
+ ok = checkSolutions(read.getValue(), config, Optional.of(results.getValue()));
+ Timings timings = results.getKey();
+ tabulateCsv(
+ opt.csv,
+ mkCsvRow(timings.copy, "Copy", 2 * arrayBytes, megaScale, opt),
+ mkCsvRow(timings.mul, "Mul", 2 * arrayBytes, megaScale, opt),
+ mkCsvRow(timings.add, "Add", 3 * arrayBytes, megaScale, opt),
+ mkCsvRow(timings.triad, "Triad", 3 * arrayBytes, megaScale, opt),
+ mkCsvRow(timings.dot, "Dot", 2 * arrayBytes, megaScale, opt));
+ break;
+ }
case NSTREAM:
- List nstreamResults = stream.runNStream(opt.numtimes);
- ok = checkSolutions(stream.data(), config, Optional.empty());
- tabulateCsv(opt.csv, mkCsvRow(nstreamResults, "Nstream", 4 * arrayBytes, megaScale, opt));
- break;
+ {
+ List nstreamResults = stream.runNStream(opt.numtimes);
+ SimpleImmutableEntry> read = stream.runReadArrays();
+ showInit(totalBytes, megaScale, opt, init, read.getKey());
+ ok = checkSolutions(read.getValue(), config, Optional.empty());
+ tabulateCsv(opt.csv, mkCsvRow(nstreamResults, "Nstream", 4 * arrayBytes, megaScale, opt));
+ break;
+ }
case TRIAD:
- Duration triadResult = stream.runTriad(opt.numtimes);
- ok = checkSolutions(stream.data(), config, Optional.empty());
- int triadTotalBytes = 3 * arrayBytes * opt.numtimes;
- double bandwidth = megaScale * (triadTotalBytes / durationToSeconds(triadResult));
- System.out.printf("Runtime (seconds): %.5f", durationToSeconds(triadResult));
- System.out.printf("Bandwidth (%s/s): %.3f ", gigaSuffix, bandwidth);
- break;
+ {
+ Duration triadResult = stream.runTriad(opt.numtimes);
+ SimpleImmutableEntry> read = stream.runReadArrays();
+ showInit(totalBytes, megaScale, opt, init, read.getKey());
+ ok = checkSolutions(read.getValue(), config, Optional.empty());
+ int triadTotalBytes = 3 * arrayBytes * opt.numtimes;
+ double bandwidth = megaScale * (triadTotalBytes / durationToSeconds(triadResult));
+ System.out.printf("Runtime (seconds): %.5f", durationToSeconds(triadResult));
+ System.out.printf("Bandwidth (%s/s): %.3f ", gigaSuffix, bandwidth);
+ break;
+ }
default:
throw new AssertionError();
}
diff --git a/src/java/java-stream/src/main/java/javastream/aparapi/AparapiStreams.java b/src/java/java-stream/src/main/java/javastream/aparapi/AparapiStreams.java
index ab2de52..052c807 100644
--- a/src/java/java-stream/src/main/java/javastream/aparapi/AparapiStreams.java
+++ b/src/java/java-stream/src/main/java/javastream/aparapi/AparapiStreams.java
@@ -122,7 +122,7 @@ public final class AparapiStreams {
}
@Override
- public Data data() {
+ public Data readArrays() {
return kernels.syncAndDispose();
}
}
diff --git a/src/java/java-stream/src/main/java/javastream/jdk/GenericPlainStream.java b/src/java/java-stream/src/main/java/javastream/jdk/GenericPlainStream.java
index 7f210fa..8075603 100644
--- a/src/java/java-stream/src/main/java/javastream/jdk/GenericPlainStream.java
+++ b/src/java/java-stream/src/main/java/javastream/jdk/GenericPlainStream.java
@@ -86,7 +86,7 @@ final class GenericPlainStream extends JavaStream {
}
@Override
- public Data data() {
+ public Data readArrays() {
return new Data<>(a, b, c);
}
}
diff --git a/src/java/java-stream/src/main/java/javastream/jdk/GenericStream.java b/src/java/java-stream/src/main/java/javastream/jdk/GenericStream.java
index 1e65b8f..3cacf3a 100644
--- a/src/java/java-stream/src/main/java/javastream/jdk/GenericStream.java
+++ b/src/java/java-stream/src/main/java/javastream/jdk/GenericStream.java
@@ -80,7 +80,7 @@ final class GenericStream extends JavaStream {
}
@Override
- public Data data() {
+ public Data readArrays() {
return new Data<>(a, b, c);
}
}
diff --git a/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedDoubleStream.java b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedDoubleStream.java
index 26406a6..1b54bc3 100644
--- a/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedDoubleStream.java
+++ b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedDoubleStream.java
@@ -78,7 +78,7 @@ final class SpecialisedDoubleStream extends JavaStream {
}
@Override
- public Data data() {
+ public Data readArrays() {
return new Data<>(boxed(a), boxed(b), boxed(c));
}
}
diff --git a/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedFloatStream.java b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedFloatStream.java
index 6c414c1..4d8c137 100644
--- a/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedFloatStream.java
+++ b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedFloatStream.java
@@ -78,7 +78,7 @@ final class SpecialisedFloatStream extends JavaStream {
}
@Override
- public Data data() {
+ public Data readArrays() {
return new Data<>(boxed(a), boxed(b), boxed(c));
}
}
diff --git a/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainDoubleStream.java b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainDoubleStream.java
index afda2ef..c4f38d0 100644
--- a/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainDoubleStream.java
+++ b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainDoubleStream.java
@@ -78,7 +78,7 @@ final class SpecialisedPlainDoubleStream extends JavaStream {
}
@Override
- public Data data() {
+ public Data readArrays() {
return new Data<>(boxed(a), boxed(b), boxed(c));
}
}
diff --git a/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainFloatStream.java b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainFloatStream.java
index 9ccee53..5178ed2 100644
--- a/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainFloatStream.java
+++ b/src/java/java-stream/src/main/java/javastream/jdk/SpecialisedPlainFloatStream.java
@@ -78,7 +78,7 @@ final class SpecialisedPlainFloatStream extends JavaStream {
}
@Override
- public Data data() {
+ public Data readArrays() {
return new Data<>(boxed(a), boxed(b), boxed(c));
}
}
diff --git a/src/java/java-stream/src/main/java/javastream/tornadovm/GenericTornadoVMStream.java b/src/java/java-stream/src/main/java/javastream/tornadovm/GenericTornadoVMStream.java
index d936df6..a65c32a 100644
--- a/src/java/java-stream/src/main/java/javastream/tornadovm/GenericTornadoVMStream.java
+++ b/src/java/java-stream/src/main/java/javastream/tornadovm/GenericTornadoVMStream.java
@@ -4,8 +4,8 @@ import java.util.List;
import java.util.stream.Collectors;
import javastream.JavaStream;
import javastream.Main.Config;
-import uk.ac.manchester.tornado.api.TaskSchedule;
-import uk.ac.manchester.tornado.api.TornadoRuntimeCI;
+import uk.ac.manchester.tornado.api.TornadoExecutionPlan;
+import uk.ac.manchester.tornado.api.TornadoRuntimeInterface;
import uk.ac.manchester.tornado.api.common.TornadoDevice;
import uk.ac.manchester.tornado.api.runtime.TornadoRuntime;
@@ -13,18 +13,18 @@ abstract class GenericTornadoVMStream extends JavaStream {
protected final TornadoDevice device;
- protected TaskSchedule copyTask;
- protected TaskSchedule mulTask;
- protected TaskSchedule addTask;
- protected TaskSchedule triadTask;
- protected TaskSchedule nstreamTask;
- protected TaskSchedule dotTask;
+ protected TornadoExecutionPlan copyTask;
+ protected TornadoExecutionPlan mulTask;
+ protected TornadoExecutionPlan addTask;
+ protected TornadoExecutionPlan triadTask;
+ protected TornadoExecutionPlan nstreamTask;
+ protected TornadoExecutionPlan dotTask;
GenericTornadoVMStream(Config config) {
super(config);
try {
- TornadoRuntimeCI runtime = TornadoRuntime.getTornadoRuntime();
+ TornadoRuntimeInterface runtime = TornadoRuntime.getTornadoRuntime();
List devices = TornadoVMStreams.enumerateDevices(runtime);
device = devices.get(config.options.device);
@@ -42,10 +42,6 @@ abstract class GenericTornadoVMStream extends JavaStream {
}
}
- protected static TaskSchedule mkSchedule() {
- return new TaskSchedule("");
- }
-
@Override
public List listDevices() {
return TornadoVMStreams.enumerateDevices(TornadoRuntime.getTornadoRuntime()).stream()
@@ -55,12 +51,12 @@ abstract class GenericTornadoVMStream extends JavaStream {
@Override
public void initArrays() {
- this.copyTask.warmup();
- this.mulTask.warmup();
- this.addTask.warmup();
- this.triadTask.warmup();
- this.nstreamTask.warmup();
- this.dotTask.warmup();
+ this.copyTask.withWarmUp();
+ this.mulTask.withWarmUp();
+ this.addTask.withWarmUp();
+ this.triadTask.withWarmUp();
+ this.nstreamTask.withWarmUp();
+ this.dotTask.withWarmUp();
}
@Override
diff --git a/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedDouble.java b/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedDouble.java
index 7712e31..c10153e 100644
--- a/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedDouble.java
+++ b/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedDouble.java
@@ -2,8 +2,11 @@ package javastream.tornadovm;
import java.util.Arrays;
import javastream.Main.Config;
+import uk.ac.manchester.tornado.api.TaskGraph;
+import uk.ac.manchester.tornado.api.TornadoExecutionPlan;
import uk.ac.manchester.tornado.api.annotations.Parallel;
import uk.ac.manchester.tornado.api.annotations.Reduce;
+import uk.ac.manchester.tornado.api.enums.DataTransferMode;
final class SpecialisedDouble extends GenericTornadoVMStream {
@@ -49,7 +52,7 @@ final class SpecialisedDouble extends GenericTornadoVMStream {
private final double[] a, b, c;
private final double[] dotSum;
- @SuppressWarnings({"PrimitiveArrayArgumentToVarargsMethod", "DuplicatedCode"})
+ @SuppressWarnings({"DuplicatedCode"})
SpecialisedDouble(Config config) {
super(config);
final int size = config.options.arraysize;
@@ -58,12 +61,43 @@ final class SpecialisedDouble extends GenericTornadoVMStream {
b = new double[size];
c = new double[size];
dotSum = new double[1];
- this.copyTask = mkSchedule().task("", SpecialisedDouble::copy, size, a, c);
- this.mulTask = mkSchedule().task("", SpecialisedDouble::mul, size, b, c, scalar);
- this.addTask = mkSchedule().task("", SpecialisedDouble::add, size, a, b, c);
- this.triadTask = mkSchedule().task("", SpecialisedDouble::triad, size, a, b, c, scalar);
- this.nstreamTask = mkSchedule().task("", SpecialisedDouble::nstream, size, a, b, c, scalar);
- this.dotTask = mkSchedule().task("", SpecialisedDouble::dot_, a, b, dotSum).streamOut(dotSum);
+ this.copyTask =
+ new TornadoExecutionPlan(
+ new TaskGraph("copy")
+ .task("copy", SpecialisedDouble::copy, size, a, c)
+ .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, c)
+ .snapshot());
+ this.mulTask =
+ new TornadoExecutionPlan(
+ new TaskGraph("mul")
+ .task("mul", SpecialisedDouble::mul, size, b, c, scalar)
+ .transferToDevice(DataTransferMode.FIRST_EXECUTION, b, c)
+ .snapshot());
+ this.addTask =
+ new TornadoExecutionPlan(
+ new TaskGraph("add")
+ .task("add", SpecialisedDouble::add, size, a, b, c)
+ .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
+ .snapshot());
+ this.triadTask =
+ new TornadoExecutionPlan(
+ new TaskGraph("triad")
+ .task("triad", SpecialisedDouble::triad, size, a, b, c, scalar)
+ .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
+ .snapshot());
+ this.nstreamTask =
+ new TornadoExecutionPlan(
+ new TaskGraph("nstream")
+ .task("nstream", SpecialisedDouble::nstream, size, a, b, c, scalar)
+ .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
+ .snapshot());
+ this.dotTask =
+ new TornadoExecutionPlan(
+ new TaskGraph("dot")
+ .task("dot", SpecialisedDouble::dot_, a, b, dotSum)
+ .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b)
+ .transferToHost(DataTransferMode.EVERY_EXECUTION, new Object[] {dotSum})
+ .snapshot());
}
@Override
@@ -72,7 +106,7 @@ final class SpecialisedDouble extends GenericTornadoVMStream {
Arrays.fill(a, config.initA);
Arrays.fill(b, config.initB);
Arrays.fill(c, config.initC);
- TornadoVMStreams.xferToDevice(device, a, b, c);
+ TornadoVMStreams.allocAndXferToDevice(device, a, b, c);
}
@Override
@@ -81,7 +115,7 @@ final class SpecialisedDouble extends GenericTornadoVMStream {
}
@Override
- public Data data() {
+ public Data readArrays() {
TornadoVMStreams.xferFromDevice(device, a, b, c);
return new Data<>(boxed(a), boxed(b), boxed(c));
}
diff --git a/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedFloat.java b/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedFloat.java
index e61cfe9..0f3fffa 100644
--- a/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedFloat.java
+++ b/src/java/java-stream/src/main/java/javastream/tornadovm/SpecialisedFloat.java
@@ -2,8 +2,11 @@ package javastream.tornadovm;
import java.util.Arrays;
import javastream.Main.Config;
+import uk.ac.manchester.tornado.api.TaskGraph;
+import uk.ac.manchester.tornado.api.TornadoExecutionPlan;
import uk.ac.manchester.tornado.api.annotations.Parallel;
import uk.ac.manchester.tornado.api.annotations.Reduce;
+import uk.ac.manchester.tornado.api.enums.DataTransferMode;
final class SpecialisedFloat extends GenericTornadoVMStream {
@@ -49,7 +52,7 @@ final class SpecialisedFloat extends GenericTornadoVMStream {
private final float[] a, b, c;
private final float[] dotSum;
- @SuppressWarnings({"PrimitiveArrayArgumentToVarargsMethod", "DuplicatedCode"})
+ @SuppressWarnings({"DuplicatedCode"})
SpecialisedFloat(Config config) {
super(config);
final int size = config.options.arraysize;
@@ -58,12 +61,43 @@ final class SpecialisedFloat extends GenericTornadoVMStream {
b = new float[size];
c = new float[size];
dotSum = new float[1];
- this.copyTask = mkSchedule().task("", SpecialisedFloat::copy, size, a, c);
- this.mulTask = mkSchedule().task("", SpecialisedFloat::mul, size, b, c, scalar);
- this.addTask = mkSchedule().task("", SpecialisedFloat::add, size, a, b, c);
- this.triadTask = mkSchedule().task("", SpecialisedFloat::triad, size, a, b, c, scalar);
- this.nstreamTask = mkSchedule().task("", SpecialisedFloat::nstream, size, a, b, c, scalar);
- this.dotTask = mkSchedule().task("", SpecialisedFloat::dot_, a, b, dotSum).streamOut(dotSum);
+ this.copyTask =
+ new TornadoExecutionPlan(
+ new TaskGraph("copy")
+ .task("copy", SpecialisedFloat::copy, size, a, c)
+ .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, c)
+ .snapshot());
+ this.mulTask =
+ new TornadoExecutionPlan(
+ new TaskGraph("mul")
+ .task("mul", SpecialisedFloat::mul, size, b, c, scalar)
+ .transferToDevice(DataTransferMode.FIRST_EXECUTION, b, c)
+ .snapshot());
+ this.addTask =
+ new TornadoExecutionPlan(
+ new TaskGraph("add")
+ .task("add", SpecialisedFloat::add, size, a, b, c)
+ .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
+ .snapshot());
+ this.triadTask =
+ new TornadoExecutionPlan(
+ new TaskGraph("triad")
+ .task("triad", SpecialisedFloat::triad, size, a, b, c, scalar)
+ .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
+ .snapshot());
+ this.nstreamTask =
+ new TornadoExecutionPlan(
+ new TaskGraph("nstream")
+ .task("nstream", SpecialisedFloat::nstream, size, a, b, c, scalar)
+ .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
+ .snapshot());
+ this.dotTask =
+ new TornadoExecutionPlan(
+ new TaskGraph("dot")
+ .task("dot", SpecialisedFloat::dot_, a, b, dotSum)
+ .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b)
+ .transferToHost(DataTransferMode.EVERY_EXECUTION, new Object[] {dotSum})
+ .snapshot());
}
@Override
@@ -72,7 +106,7 @@ final class SpecialisedFloat extends GenericTornadoVMStream {
Arrays.fill(a, config.initA);
Arrays.fill(b, config.initB);
Arrays.fill(c, config.initC);
- TornadoVMStreams.xferToDevice(device, a, b, c);
+ TornadoVMStreams.allocAndXferToDevice(device, a, b, c);
}
@Override
@@ -81,7 +115,7 @@ final class SpecialisedFloat extends GenericTornadoVMStream {
}
@Override
- public Data data() {
+ public Data readArrays() {
TornadoVMStreams.xferFromDevice(device, a, b, c);
return new Data<>(boxed(a), boxed(b), boxed(c));
}
diff --git a/src/java/java-stream/src/main/java/javastream/tornadovm/TornadoVMStreams.java b/src/java/java-stream/src/main/java/javastream/tornadovm/TornadoVMStreams.java
index 68eecad..a43c7c8 100644
--- a/src/java/java-stream/src/main/java/javastream/tornadovm/TornadoVMStreams.java
+++ b/src/java/java-stream/src/main/java/javastream/tornadovm/TornadoVMStreams.java
@@ -1,36 +1,46 @@
package javastream.tornadovm;
+import java.util.Arrays;
import java.util.List;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import javastream.JavaStream;
import javastream.Main.Config;
-import uk.ac.manchester.tornado.api.TornadoRuntimeCI;
+import uk.ac.manchester.tornado.api.TornadoRuntimeInterface;
+import uk.ac.manchester.tornado.api.common.Event;
import uk.ac.manchester.tornado.api.common.TornadoDevice;
-import uk.ac.manchester.tornado.api.mm.TornadoGlobalObjectState;
+import uk.ac.manchester.tornado.api.memory.TornadoDeviceObjectState;
+import uk.ac.manchester.tornado.api.memory.TornadoGlobalObjectState;
import uk.ac.manchester.tornado.api.runtime.TornadoRuntime;
public final class TornadoVMStreams {
private TornadoVMStreams() {}
- static void xferToDevice(TornadoDevice device, Object... xs) {
+ static void allocAndXferToDevice(TornadoDevice device, Object... xs) {
for (Object x : xs) {
TornadoGlobalObjectState state = TornadoRuntime.getTornadoRuntime().resolveObject(x);
+ device.allocateObjects(
+ new Object[] {x}, 0, new TornadoDeviceObjectState[] {state.getDeviceState(device)});
List writeEvent = device.ensurePresent(x, state.getDeviceState(device), null, 0, 0);
if (writeEvent != null) writeEvent.forEach(e -> device.resolveEvent(e).waitOn());
}
}
static void xferFromDevice(TornadoDevice device, Object... xs) {
- for (Object x : xs) {
- TornadoGlobalObjectState state = TornadoRuntime.getTornadoRuntime().resolveObject(x);
- device.resolveEvent(device.streamOut(x, 0, state.getDeviceState(device), null)).waitOn();
- }
+ Arrays.stream(xs)
+ .map(
+ x -> {
+ TornadoGlobalObjectState state = TornadoRuntime.getTornadoRuntime().resolveObject(x);
+ return device.resolveEvent(
+ device.streamOut(x, 0, state.getDeviceState(device), null));
+ })
+ .collect(Collectors.toList())
+ .forEach(Event::waitOn);
}
- static List enumerateDevices(TornadoRuntimeCI runtime) {
+ static List enumerateDevices(TornadoRuntimeInterface runtime) {
return IntStream.range(0, runtime.getNumDrivers())
.mapToObj(runtime::getDriver)
.flatMap(d -> IntStream.range(0, d.getDeviceCount()).mapToObj(d::getDevice))
diff --git a/src/julia/JuliaStream.jl/src/Stream.jl b/src/julia/JuliaStream.jl/src/Stream.jl
index 42030f8..226d44b 100644
--- a/src/julia/JuliaStream.jl/src/Stream.jl
+++ b/src/julia/JuliaStream.jl/src/Stream.jl
@@ -20,6 +20,18 @@ end
@enum Benchmark All Triad Nstream
+
+function run_init_arrays!(data::StreamData{T,C}, context, init::Tuple{T,T,T})::Float64 where {T,C}
+ return @elapsed init_arrays!(data, context, init)
+end
+
+function run_read_data(data::StreamData{T,C}, context)::Tuple{Float64,VectorData{T}} where {T,C}
+ elapsed = @elapsed begin
+ result = read_data(data, context)
+ end
+ return (elapsed, result)
+end
+
function run_all!(data::StreamData{T,C}, context, times::Int)::Tuple{Timings,T} where {T,C}
timings = Timings(times)
lastSum::T = 0
@@ -39,11 +51,7 @@ function run_triad!(data::StreamData{T,C}, context, times::Int)::Float64 where {
end
end
-function run_nstream!(
- data::StreamData{T,C},
- context,
- times::Int,
-)::Vector{Float64} where {T,C}
+function run_nstream!(data::StreamData{T,C}, context, times::Int)::Vector{Float64} where {T,C}
timings::Vector{Float64} = zeros(times)
for i = 1:times
@inbounds timings[i] = @elapsed nstream!(data, context)
@@ -93,9 +101,7 @@ function check_solutions(
error = abs((dot - gold_sum) / gold_sum)
failed = error > 1.0e-8
if failed
- println(
- "Validation failed on sum. Error $error \nSum was $dot but should be $gold_sum",
- )
+ println("Validation failed on sum. Error $error \nSum was $dot but should be $gold_sum")
end
!failed
end : true
@@ -166,7 +172,7 @@ function main()
parse_options(config)
if config.list
- for (i, (_,repr, impl)) in enumerate(devices())
+ for (i, (_, repr, impl)) in enumerate(devices())
println("[$i] ($impl) $repr")
end
exit(0)
@@ -175,9 +181,7 @@ function main()
ds = devices()
# TODO implement substring device match
if config.device < 1 || config.device > length(ds)
- error(
- "Device $(config.device) out of range (1..$(length(ds))), NOTE: Julia is 1-indexed",
- )
+ error("Device $(config.device) out of range (1..$(length(ds))), NOTE: Julia is 1-indexed")
else
device = ds[config.device]
end
@@ -257,16 +261,42 @@ function main()
end
end
+ function show_init(init::Float64, read::Float64)
+ setup = [("Init", init, 3 * array_bytes), ("Read", read, 3 * array_bytes)]
+ if config.csv
+ tabulate(
+ map(
+ x -> [
+ ("phase", x[1]),
+ ("n_elements", config.arraysize),
+ ("sizeof", x[3]),
+ ("max_m$(config.mibibytes ? "i" : "")bytes_per_sec", mega_scale * total_bytes / x[2]),
+ ("runtime", x[2]),
+ ],
+ setup,
+ )...,
+ )
+ else
+ for (name, elapsed, total_bytes) in setup
+ println(
+ "$name: $(round(elapsed; digits=5)) s (=$(round(( mega_scale * total_bytes) / elapsed; digits = 5)) M$(config.mibibytes ? "i" : "")Bytes/sec)",
+ )
+ end
+ end
+ end
+
init::Tuple{type,type,type} = DefaultInit
scalar::type = DefaultScalar
GC.enable(false)
(data, context) = make_stream(config.arraysize, scalar, device, config.csv)
- init_arrays!(data, context, init)
+ tInit = run_init_arrays!(data, context, init)
if benchmark == All
(timings, sum) = run_all!(data, context, config.numtimes)
- valid = check_solutions(read_data(data, context), config.numtimes, init, benchmark, sum)
+ (tRead, result) = run_read_data(data, context)
+ show_init(tInit, tRead)
+ valid = check_solutions(result, config.numtimes, init, benchmark, sum)
tabulate(
mk_row(timings.copy, "Copy", 2 * array_bytes),
mk_row(timings.mul, "Mul", 2 * array_bytes),
@@ -276,13 +306,15 @@ function main()
)
elseif benchmark == Nstream
timings = run_nstream!(data, context, config.numtimes)
- valid =
- check_solutions(read_data(data, context), config.numtimes, init, benchmark, nothing)
+ (tRead, result) = run_read_data(data, context)
+ show_init(tInit, tRead)
+ valid = check_solutions(result, config.numtimes, init, benchmark, nothing)
tabulate(mk_row(timings, "Nstream", 4 * array_bytes))
elseif benchmark == Triad
elapsed = run_triad!(data, context, config.numtimes)
- valid =
- check_solutions(read_data(data, context), config.numtimes, init, benchmark, nothing)
+ (tRead, result) = run_read_data(data, context)
+ show_init(tInit, tRead)
+ valid = check_solutions(result, config.numtimes, init, benchmark, nothing)
total_bytes = 3 * array_bytes * config.numtimes
bandwidth = mega_scale * (total_bytes / elapsed)
println("Runtime (seconds): $(round(elapsed; digits=5))")
@@ -290,7 +322,6 @@ function main()
else
error("Bad benchmark $(benchmark)")
end
-
GC.enable(true)
if !valid
diff --git a/src/main.cpp b/src/main.cpp
index 54a3ed9..639f0c3 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -306,7 +306,9 @@ void run()
#endif
+ auto init1 = std::chrono::high_resolution_clock::now();
stream->init_arrays(startA, startB, startC);
+ auto init2 = std::chrono::high_resolution_clock::now();
// Result of the Dot kernel, if used.
T sum{};
@@ -333,7 +335,54 @@ void run()
std::vector c(ARRAY_SIZE);
+ auto read1 = std::chrono::high_resolution_clock::now();
stream->read_arrays(a, b, c);
+ auto read2 = std::chrono::high_resolution_clock::now();
+
+ auto initElapsedS = std::chrono::duration_cast>(read2 - read1).count();
+ auto readElapsedS = std::chrono::duration_cast>(init2 - init1).count();
+ auto initBWps = ((mibibytes ? std::pow(2.0, -20.0) : 1.0E-6) * (3 * sizeof(T) * ARRAY_SIZE)) / initElapsedS;
+ auto readBWps = ((mibibytes ? std::pow(2.0, -20.0) : 1.0E-6) * (3 * sizeof(T) * ARRAY_SIZE)) / readElapsedS;
+
+ if (output_as_csv)
+ {
+ std::cout
+ << "phase" << csv_separator
+ << "n_elements" << csv_separator
+ << "sizeof" << csv_separator
+ << ((mibibytes) ? "max_mibytes_per_sec" : "max_mbytes_per_sec") << csv_separator
+ << "runtime" << std::endl;
+ std::cout
+ << "Init" << csv_separator
+ << ARRAY_SIZE << csv_separator
+ << sizeof(T) << csv_separator
+ << initBWps << csv_separator
+ << initElapsedS << std::endl;
+ std::cout
+ << "Read" << csv_separator
+ << ARRAY_SIZE << csv_separator
+ << sizeof(T) << csv_separator
+ << readBWps << csv_separator
+ << readElapsedS << std::endl;
+ }
+ else
+ {
+ std::cout << "Init: "
+ << std::setw(7)
+ << initElapsedS
+ << " s (="
+ << initBWps
+ << (mibibytes ? " MiBytes/sec" : " MBytes/sec")
+ << ")" << std::endl;
+ std::cout << "Read: "
+ << std::setw(7)
+ << readElapsedS
+ << " s (="
+ << readBWps
+ << (mibibytes ? " MiBytes/sec" : " MBytes/sec")
+ << ")" << std::endl;
+ }
+
check_solution(num_times, a, b, c, sum);
// Display timing results
diff --git a/src/rust/rust-stream/rustfmt.toml b/src/rust/rust-stream/rustfmt.toml
index aa2f0e9..66b6235 100644
--- a/src/rust/rust-stream/rustfmt.toml
+++ b/src/rust/rust-stream/rustfmt.toml
@@ -54,7 +54,7 @@ use_field_init_shorthand = false
force_explicit_abi = true
condense_wildcard_suffixes = false
color = "Auto"
-required_version = "1.4.38"
+required_version = "1.6.0"
unstable_features = false
disable_all_formatting = false
skip_children = false
diff --git a/src/rust/rust-stream/src/lib.rs b/src/rust/rust-stream/src/lib.rs
index 3ac72c3..41ac0c2 100644
--- a/src/rust/rust-stream/src/lib.rs
+++ b/src/rust/rust-stream/src/lib.rs
@@ -174,7 +174,7 @@ where StreamData: RustStream {
);
}
- stream.init_arrays();
+ let init = stream.run_init_arrays();
let tabulate = |xs: &Vec, name: &str, t_size: usize| -> Vec<(&str, String)> {
let tail = &xs[1..]; // tail only
@@ -235,10 +235,47 @@ where StreamData: RustStream {
};
};
+ let show_setup = |init: Duration, read: Duration| {
+ let setup = vec![
+ ("Init", init.as_secs_f64(), 3 * array_bytes),
+ ("Read", read.as_secs_f64(), 3 * array_bytes),
+ ];
+ if option.csv {
+ tabulate_all(
+ setup
+ .iter()
+ .map(|(name, elapsed, t_size)| {
+ vec![
+ ("phase", name.to_string()),
+ ("n_elements", option.arraysize.to_string()),
+ ("sizeof", t_size.to_string()),
+ (
+ if option.mibibytes { "max_mibytes_per_sec" } else { "max_mbytes_per_sec" },
+ (mega_scale * (*t_size as f64) / elapsed).to_string(),
+ ),
+ ("runtime", elapsed.to_string()),
+ ]
+ })
+ .collect::>(),
+ );
+ } else {
+ for (name, elapsed, t_size) in setup {
+ println!(
+ "{}: {:.5} s (={:.5} {})",
+ name,
+ elapsed,
+ mega_scale * (t_size as f64) / elapsed,
+ if option.mibibytes { "MiBytes/sec" } else { "MBytes/sec" }
+ );
+ }
+ }
+ };
+
let solutions_correct = match benchmark {
Benchmark::All => {
let (results, sum) = stream.run_all(option.numtimes);
- stream.read_arrays();
+ let read = stream.run_read_arrays();
+ show_setup(init, read);
let correct = check_solution(benchmark, option.numtimes, &stream, Some(sum));
tabulate_all(vec![
tabulate(&results.copy, "Copy", 2 * array_bytes),
@@ -251,14 +288,16 @@ where StreamData: RustStream {
}
Benchmark::NStream => {
let results = stream.run_nstream(option.numtimes);
- stream.read_arrays();
+ let read = stream.run_read_arrays();
+ show_setup(init, read);
let correct = check_solution(benchmark, option.numtimes, &stream, None);
tabulate_all(vec![tabulate(&results, "Nstream", 4 * array_bytes)]);
correct
}
Benchmark::Triad => {
let results = stream.run_triad(option.numtimes);
- stream.read_arrays();
+ let read = stream.run_read_arrays();
+ show_setup(init, read);
let correct = check_solution(benchmark, option.numtimes, &stream, None);
let total_bytes = 3 * array_bytes * option.numtimes;
let bandwidth = giga_scale * (total_bytes as f64 / results.as_secs_f64());
diff --git a/src/rust/rust-stream/src/stream.rs b/src/rust/rust-stream/src/stream.rs
index 560c6f1..86de56b 100644
--- a/src/rust/rust-stream/src/stream.rs
+++ b/src/rust/rust-stream/src/stream.rs
@@ -132,6 +132,18 @@ pub trait RustStream {
fn nstream(&mut self);
fn dot(&mut self) -> T;
+ fn run_init_arrays(&mut self) -> Duration {
+ timed(|| {
+ self.init_arrays();
+ })
+ }
+
+ fn run_read_arrays(&mut self) -> Duration {
+ timed(|| {
+ self.read_arrays();
+ })
+ }
+
fn run_all(&mut self, n: usize) -> (AllTiming>, T) {
let mut timings: AllTiming> = AllTiming {
copy: vec![Duration::default(); n],
diff --git a/src/rust/rust-stream/tests/integration_test.rs b/src/rust/rust-stream/tests/integration_test.rs
index 8031a79..0170546 100644
--- a/src/rust/rust-stream/tests/integration_test.rs
+++ b/src/rust/rust-stream/tests/integration_test.rs
@@ -2,10 +2,10 @@ use rstest::rstest;
#[rstest]
fn test_main(
- #[values(0, 1, 2, 3, 4)] device: usize, //
- #[values("", "--pin")] pin: &str, //
- #[values("", "--malloc")] malloc: &str, //
- #[values("", "--init")] init: &str, //
+ #[values(0, 1, 2, 3, 4)] device: usize, //
+ #[values("", "--pin")] pin: &str, //
+ #[values("", "--malloc")] malloc: &str, //
+ #[values("", "--init")] init: &str, //
#[values("", "--triad-only", "--nstream-only")] option: &str, //
) {
let line = format!(
diff --git a/src/scala/scala-stream/.bsp/sbt.json b/src/scala/scala-stream/.bsp/sbt.json
deleted file mode 100644
index 2e1edb1..0000000
--- a/src/scala/scala-stream/.bsp/sbt.json
+++ /dev/null
@@ -1 +0,0 @@
-{"name":"sbt","version":"1.5.2","bspVersion":"2.0.0-M5","languages":["scala"],"argv":["/usr/lib/jvm/java-11-openjdk-11.0.11.0.9-2.fc33.x86_64/bin/java","-Xms100m","-Xmx100m","-classpath","/home/tom/.local/share/JetBrains/Toolbox/apps/IDEA-U/ch-0/211.7142.45.plugins/Scala/launcher/sbt-launch.jar","xsbt.boot.Boot","-bsp","--sbt-launch-jar=/home/tom/.local/share/JetBrains/Toolbox/apps/IDEA-U/ch-0/211.7142.45.plugins/Scala/launcher/sbt-launch.jar"]}
\ No newline at end of file
diff --git a/src/scala/scala-stream/.gitignore b/src/scala/scala-stream/.gitignore
index 2f7896d..ee5cda2 100644
--- a/src/scala/scala-stream/.gitignore
+++ b/src/scala/scala-stream/.gitignore
@@ -1 +1,2 @@
target/
+.bsp/
diff --git a/src/scala/scala-stream/.scalafmt.conf b/src/scala/scala-stream/.scalafmt.conf
index 8c7d0c8..5d87df3 100644
--- a/src/scala/scala-stream/.scalafmt.conf
+++ b/src/scala/scala-stream/.scalafmt.conf
@@ -1,4 +1,4 @@
-version = "3.0.0-RC2"
+version = "3.7.14"
runner.dialect = scala3
style = defaultWithAlign
diff --git a/src/scala/scala-stream/build.sbt b/src/scala/scala-stream/build.sbt
index 49164f6..b13fda3 100644
--- a/src/scala/scala-stream/build.sbt
+++ b/src/scala/scala-stream/build.sbt
@@ -3,7 +3,7 @@ lazy val mainCls = Some("scalastream.App")
lazy val root = (project in file("."))
.enablePlugins(NativeImagePlugin)
.settings(
- scalaVersion := "3.0.0",
+ scalaVersion := "3.3.1",
version := "4.0",
organization := "uk.ac.bristol.uob-hpc",
organizationName := "University of Bristol",
@@ -11,6 +11,11 @@ lazy val root = (project in file("."))
assembly / mainClass := mainCls,
scalacOptions ~= filterConsoleScalacOptions,
assembly / assemblyJarName := "scala-stream.jar",
+ assembly / assemblyMergeStrategy := {
+ case PathList("module-info.class") => MergeStrategy.discard
+ case PathList("META-INF", "versions", xs @ _, "module-info.class") => MergeStrategy.discard
+ case x => (ThisBuild / assemblyMergeStrategy).value(x)
+ },
nativeImageOptions := Seq(
"--no-fallback",
"-H:ReflectionConfigurationFiles=../../reflect-config.json"
@@ -22,8 +27,8 @@ lazy val root = (project in file("."))
// Lazy val implementation in Scala 3 triggers an exception in nativeImage, use 2_13 for arg parsing for now otherwise we can't get to the benchmarking part
("com.github.scopt" %% "scopt" % "4.0.1").cross(CrossVersion.for3Use2_13),
// par also uses lazy val at some point, so it doesn't work in nativeImage
- "org.scala-lang.modules" %% "scala-parallel-collections" % "1.0.3",
- "net.openhft" % "affinity" % "3.21ea1",
- "org.slf4j" % "slf4j-simple" % "1.7.30" // for affinity
+ "org.scala-lang.modules" %% "scala-parallel-collections" % "1.0.4",
+ "net.openhft" % "affinity" % "3.23.2",
+ "org.slf4j" % "slf4j-simple" % "2.0.5" // for affinity
)
)
diff --git a/src/scala/scala-stream/project/build.properties b/src/scala/scala-stream/project/build.properties
index 19479ba..875b706 100644
--- a/src/scala/scala-stream/project/build.properties
+++ b/src/scala/scala-stream/project/build.properties
@@ -1 +1 @@
-sbt.version=1.5.2
+sbt.version=1.9.2
diff --git a/src/scala/scala-stream/project/plugins.sbt b/src/scala/scala-stream/project/plugins.sbt
index 2c82902..35a00f0 100644
--- a/src/scala/scala-stream/project/plugins.sbt
+++ b/src/scala/scala-stream/project/plugins.sbt
@@ -1,6 +1,6 @@
addSbtPlugin("com.timushev.sbt" % "sbt-updates" % "0.5.3")
-addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.1.17")
+addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.1.20")
addSbtPlugin("org.scalameta" % "sbt-native-image" % "0.3.0")
-addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.15.0")
+addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.3")
addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.9.27")
-addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.2")
+addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.3")
diff --git a/src/scala/scala-stream/src/main/scala/scalastream/ScalaStream.scala b/src/scala/scala-stream/src/main/scala/scalastream/ScalaStream.scala
index 9c011a6..888ba7c 100644
--- a/src/scala/scala-stream/src/main/scala/scalastream/ScalaStream.scala
+++ b/src/scala/scala-stream/src/main/scala/scalastream/ScalaStream.scala
@@ -14,6 +14,7 @@ transparent trait ScalaStream[@specialized(Float, Double) A]:
def config: Config[A]
def initArrays(): Unit
+ def readArrays(): Unit = ()
def copy(): Unit
def mul(): Unit
def add(): Unit
@@ -27,6 +28,8 @@ transparent trait ScalaStream[@specialized(Float, Double) A]:
val end = System.nanoTime()
FiniteDuration(end - start, TimeUnit.NANOSECONDS) -> r
+ inline def runInitArrays(): FiniteDuration = timed(initArrays())._1
+ inline def runReadArrays(): FiniteDuration = timed(readArrays())._1
inline def runAll(times: Int)(using Fractional[A]): (Timings[Vector[FiniteDuration]], A) =
val copy = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
val mul = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
@@ -62,7 +65,6 @@ transparent trait ScalaStream[@specialized(Float, Double) A]:
def data(): Data[A]
-
trait Fractional[@specialized(Double, Float) A]:
def toFractional(f: Float): A
def toFractional(f: Double): A
@@ -77,13 +79,13 @@ trait Fractional[@specialized(Double, Float) A]:
extension (x: Int) inline def fractional = toFractional(x.toFloat)
extension (x: Long) inline def fractional = toFractional(x.toDouble)
extension (x: A)
- inline def +(y: A) = add(x, y)
- inline def -(y: A) = sub(x, y)
- inline def *(y: A) = mul(x, y)
- inline def /(y: A) = div(x, y)
- inline def >(y: A) = compare(x, y) > 0
- inline def <(y: A) = compare(x, y) < 0
- inline def abs_ = abs(x)
+ inline def +(y: A) = add(x, y)
+ inline def -(y: A) = sub(x, y)
+ inline def *(y: A) = mul(x, y)
+ inline def /(y: A) = div(x, y)
+ inline def >(y: A) = compare(x, y) > 0
+ inline def <(y: A) = compare(x, y) < 0
+ inline def abs_ = abs(x)
end Fractional
given FloatFractional: Fractional[Float] with
@@ -204,7 +206,7 @@ object App:
validateXs("c", vec.c, goldC)
dotSum.foreach { sum =>
- val goldSum = (goldA * goldB) * (config.options.arraysize).fractional
+ val goldSum = (goldA * goldB) * config.options.arraysize.fractional
val error = ((sum - goldSum) / goldSum).abs_
if error > 1.fractional / 100000000.fractional then
Console.err.println(
@@ -238,10 +240,10 @@ object App:
)
println(s"Running ${config.benchmark match {
- case Benchmark.All => "kernels"
- case Benchmark.Triad => "triad"
- case Benchmark.NStream => "nstream"
- }} ${opt.numtimes} times")
+ case Benchmark.All => "kernels"
+ case Benchmark.Triad => "triad"
+ case Benchmark.NStream => "nstream"
+ }} ${opt.numtimes} times")
if config.benchmark == Benchmark.Triad then println(s"Number of elements: ${opt.arraysize}")
@@ -288,11 +290,38 @@ object App:
println(header.map(_._1.padTo(padding, ' ')).mkString(sep))
println(rows.map(_.map(_._2.padTo(padding, ' ')).mkString(sep)).mkString("\n"))
+ def showInit(init: FiniteDuration, read: FiniteDuration): Unit = {
+ val setup =
+ Vector(("Init", init.seconds, 3 * arrayBytes), ("Read", read.seconds, 3 * arrayBytes))
+ if opt.csv then
+ tabulate(
+ setup.map((name, elapsed, totalBytes) =>
+ Vector(
+ "phase" -> name,
+ "n_elements" -> opt.arraysize.toString,
+ "sizeof" -> arrayBytes.toString,
+ s"max_m${if opt.mibibytes then "i" else ""}bytes_per_sec" ->
+ (megaScale * totalBytes.toDouble / elapsed).toString,
+ "runtime" -> elapsed.toString
+ )
+ ): _*
+ )
+ else
+ for (name, elapsed, totalBytes) <- setup do
+ println(
+ f"$name: $elapsed%.5f s (=${megaScale * totalBytes.toDouble / elapsed}%.5f M${
+ if opt.mibibytes then "i" else ""
+ }Bytes/sec)"
+ )
+ }
+
val stream = mkStream(config)
- stream.initArrays()
+ val init = stream.runInitArrays()
config.benchmark match
case Benchmark.All =>
val (results, sum) = stream.runAll(opt.numtimes)
+ val read = stream.runReadArrays()
+ showInit(init, read)
validate(stream.data(), config, Some(sum))
tabulate(
mkRow(results.copy, "Copy", 2 * arrayBytes),
@@ -303,10 +332,14 @@ object App:
)
case Benchmark.NStream =>
val result = stream.runNStream(opt.numtimes)
+ val read = stream.runReadArrays()
+ showInit(init, read)
validate(stream.data(), config)
tabulate(mkRow(result, "Nstream", 4 * arrayBytes))
case Benchmark.Triad =>
- val results = stream.runTriad(opt.numtimes)
+ val results = stream.runTriad(opt.numtimes)
+ val read = stream.runReadArrays()
+ showInit(init, read)
val totalBytes = 3 * arrayBytes * opt.numtimes
val bandwidth = megaScale * (totalBytes / results.seconds)
println(f"Runtime (seconds): ${results.seconds}%.5f")