Merge branch 'time_init_read' into develop

This commit is contained in:
Tom Lin 2023-10-07 15:09:52 +01:00
commit a27abfe296
28 changed files with 435 additions and 136 deletions

View File

@ -7,6 +7,8 @@ All notable changes to this project will be documented in this file.
- Thrust managed memory. - Thrust managed memory.
- HIP managed memory. - HIP managed memory.
- New implementation using SYCL2020 USM (sycl2020-acc) and renamed original `sycl2020` to `sycl2020-acc`. - New implementation using SYCL2020 USM (sycl2020-acc) and renamed original `sycl2020` to `sycl2020-acc`.
- Data initialisation and read-back timing for all models, including Java, Scala, Julia, and Rust
- Add support for the latest Aparapi (3.0.0) and TornadoVM (0.15.x) for Java
### Changed ### Changed
- RAJA CUDA CMake build issues resolved. - RAJA CUDA CMake build issues resolved.
@ -17,6 +19,7 @@ All notable changes to this project will be documented in this file.
- Number of thread-blocks in CUDA dot kernel implementation changed to 1024. - Number of thread-blocks in CUDA dot kernel implementation changed to 1024.
- Fix compatibility of `sycl2020` (now `sycl2020-acc`) with hipSYCL. - Fix compatibility of `sycl2020` (now `sycl2020-acc`) with hipSYCL.
- Bumped Julia compat to 1.9 - Bumped Julia compat to 1.9
- Bumped Scala to 3.3.1
- Bumped Rust to 1.74.0-nightly (13e6f24b9 2023-09-23) - Bumped Rust to 1.74.0-nightly (13e6f24b9 2023-09-23)

View File

@ -12,7 +12,7 @@
<properties> <properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<junit.version>5.7.2</junit.version> <junit.version>5.9.2</junit.version>
</properties> </properties>
<repositories> <repositories>
@ -27,19 +27,19 @@
<dependency> <dependency>
<groupId>com.beust</groupId> <groupId>com.beust</groupId>
<artifactId>jcommander</artifactId> <artifactId>jcommander</artifactId>
<version>1.81</version> <version>1.82</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>tornado</groupId> <groupId>tornado</groupId>
<artifactId>tornado-api</artifactId> <artifactId>tornado-api</artifactId>
<version>0.9</version> <version>0.15.1</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.aparapi</groupId> <groupId>com.aparapi</groupId>
<artifactId>aparapi</artifactId> <artifactId>aparapi</artifactId>
<version>2.0.0</version> <version>3.0.0</version>
<exclusions> <exclusions>
<!-- don't pull in the entire Scala ecosystem! --> <!-- don't pull in the entire Scala ecosystem! -->
<exclusion> <exclusion>

View File

@ -56,7 +56,7 @@ public abstract class JavaStream<T> {
protected abstract T dot(); protected abstract T dot();
protected abstract Data<T> data(); protected abstract Data<T> readArrays();
public static class EnumeratedStream<T> extends JavaStream<T> { public static class EnumeratedStream<T> extends JavaStream<T> {
@ -113,8 +113,8 @@ public abstract class JavaStream<T> {
} }
@Override @Override
public Data<T> data() { public Data<T> readArrays() {
return actual.data(); return actual.readArrays();
} }
} }
@ -140,6 +140,14 @@ public abstract class JavaStream<T> {
return Duration.ofNanos(end - start); return Duration.ofNanos(end - start);
} }
final Duration runInitArrays() {
return timed(this::initArrays);
}
final SimpleImmutableEntry<Duration, Data<T>> runReadArrays() {
return timed(this::readArrays);
}
final SimpleImmutableEntry<Timings<Duration>, T> runAll(int times) { final SimpleImmutableEntry<Timings<Duration>, T> runAll(int times) {
Timings<Duration> timings = new Timings<>(); Timings<Duration> timings = new Timings<>();
T lastSum = null; T lastSum = null;

View File

@ -128,6 +128,40 @@ public class Main {
} }
} }
@SuppressWarnings("unchecked")
static void showInit(
int totalBytes, double megaScale, Options opt, Duration init, Duration read) {
List<Entry<String, Double>> setup =
Arrays.asList(
new SimpleImmutableEntry<>("Init", durationToSeconds(init)),
new SimpleImmutableEntry<>("Read", durationToSeconds(read)));
if (opt.csv) {
tabulateCsv(
true,
setup.stream()
.map(
x ->
Arrays.asList(
new SimpleImmutableEntry<>("function", x.getKey()),
new SimpleImmutableEntry<>("n_elements", opt.arraysize + ""),
new SimpleImmutableEntry<>("sizeof", totalBytes + ""),
new SimpleImmutableEntry<>(
"max_m" + (opt.mibibytes ? "i" : "") + "bytes_per_sec",
((megaScale * (double) totalBytes / x.getValue())) + ""),
new SimpleImmutableEntry<>("runtime", x.getValue() + "")))
.toArray(List[]::new));
} else {
for (Entry<String, Double> e : setup) {
System.out.printf(
"%s: %.5f s (%.5f M%sBytes/sec)%n",
e.getKey(),
e.getValue(),
megaScale * (double) totalBytes / e.getValue(),
opt.mibibytes ? "i" : "");
}
}
}
static <T extends Number> boolean run( static <T extends Number> boolean run(
String name, Config<T> config, Function<Config<T>, JavaStream<T>> mkStream) { String name, Config<T> config, Function<Config<T>, JavaStream<T>> mkStream) {
@ -183,13 +217,15 @@ public class Main {
JavaStream<T> stream = mkStream.apply(config); JavaStream<T> stream = mkStream.apply(config);
stream.initArrays(); Duration init = stream.runInitArrays();
final boolean ok; final boolean ok;
switch (config.benchmark) { switch (config.benchmark) {
case ALL: case ALL:
{
Entry<Timings<Duration>, T> results = stream.runAll(opt.numtimes); Entry<Timings<Duration>, T> results = stream.runAll(opt.numtimes);
ok = checkSolutions(stream.data(), config, Optional.of(results.getValue())); SimpleImmutableEntry<Duration, Data<T>> read = stream.runReadArrays();
showInit(totalBytes, megaScale, opt, init, read.getKey());
ok = checkSolutions(read.getValue(), config, Optional.of(results.getValue()));
Timings<Duration> timings = results.getKey(); Timings<Duration> timings = results.getKey();
tabulateCsv( tabulateCsv(
opt.csv, opt.csv,
@ -199,19 +235,28 @@ public class Main {
mkCsvRow(timings.triad, "Triad", 3 * arrayBytes, megaScale, opt), mkCsvRow(timings.triad, "Triad", 3 * arrayBytes, megaScale, opt),
mkCsvRow(timings.dot, "Dot", 2 * arrayBytes, megaScale, opt)); mkCsvRow(timings.dot, "Dot", 2 * arrayBytes, megaScale, opt));
break; break;
}
case NSTREAM: case NSTREAM:
{
List<Duration> nstreamResults = stream.runNStream(opt.numtimes); List<Duration> nstreamResults = stream.runNStream(opt.numtimes);
ok = checkSolutions(stream.data(), config, Optional.empty()); SimpleImmutableEntry<Duration, Data<T>> read = stream.runReadArrays();
showInit(totalBytes, megaScale, opt, init, read.getKey());
ok = checkSolutions(read.getValue(), config, Optional.empty());
tabulateCsv(opt.csv, mkCsvRow(nstreamResults, "Nstream", 4 * arrayBytes, megaScale, opt)); tabulateCsv(opt.csv, mkCsvRow(nstreamResults, "Nstream", 4 * arrayBytes, megaScale, opt));
break; break;
}
case TRIAD: case TRIAD:
{
Duration triadResult = stream.runTriad(opt.numtimes); Duration triadResult = stream.runTriad(opt.numtimes);
ok = checkSolutions(stream.data(), config, Optional.empty()); SimpleImmutableEntry<Duration, Data<T>> read = stream.runReadArrays();
showInit(totalBytes, megaScale, opt, init, read.getKey());
ok = checkSolutions(read.getValue(), config, Optional.empty());
int triadTotalBytes = 3 * arrayBytes * opt.numtimes; int triadTotalBytes = 3 * arrayBytes * opt.numtimes;
double bandwidth = megaScale * (triadTotalBytes / durationToSeconds(triadResult)); double bandwidth = megaScale * (triadTotalBytes / durationToSeconds(triadResult));
System.out.printf("Runtime (seconds): %.5f", durationToSeconds(triadResult)); System.out.printf("Runtime (seconds): %.5f", durationToSeconds(triadResult));
System.out.printf("Bandwidth (%s/s): %.3f ", gigaSuffix, bandwidth); System.out.printf("Bandwidth (%s/s): %.3f ", gigaSuffix, bandwidth);
break; break;
}
default: default:
throw new AssertionError(); throw new AssertionError();
} }

View File

@ -122,7 +122,7 @@ public final class AparapiStreams {
} }
@Override @Override
public Data<T> data() { public Data<T> readArrays() {
return kernels.syncAndDispose(); return kernels.syncAndDispose();
} }
} }

View File

@ -86,7 +86,7 @@ final class GenericPlainStream<T extends Number> extends JavaStream<T> {
} }
@Override @Override
public Data<T> data() { public Data<T> readArrays() {
return new Data<>(a, b, c); return new Data<>(a, b, c);
} }
} }

View File

@ -80,7 +80,7 @@ final class GenericStream<T extends Number> extends JavaStream<T> {
} }
@Override @Override
public Data<T> data() { public Data<T> readArrays() {
return new Data<>(a, b, c); return new Data<>(a, b, c);
} }
} }

View File

@ -78,7 +78,7 @@ final class SpecialisedDoubleStream extends JavaStream<Double> {
} }
@Override @Override
public Data<Double> data() { public Data<Double> readArrays() {
return new Data<>(boxed(a), boxed(b), boxed(c)); return new Data<>(boxed(a), boxed(b), boxed(c));
} }
} }

View File

@ -78,7 +78,7 @@ final class SpecialisedFloatStream extends JavaStream<Float> {
} }
@Override @Override
public Data<Float> data() { public Data<Float> readArrays() {
return new Data<>(boxed(a), boxed(b), boxed(c)); return new Data<>(boxed(a), boxed(b), boxed(c));
} }
} }

View File

@ -78,7 +78,7 @@ final class SpecialisedPlainDoubleStream extends JavaStream<Double> {
} }
@Override @Override
public Data<Double> data() { public Data<Double> readArrays() {
return new Data<>(boxed(a), boxed(b), boxed(c)); return new Data<>(boxed(a), boxed(b), boxed(c));
} }
} }

View File

@ -78,7 +78,7 @@ final class SpecialisedPlainFloatStream extends JavaStream<Float> {
} }
@Override @Override
public Data<Float> data() { public Data<Float> readArrays() {
return new Data<>(boxed(a), boxed(b), boxed(c)); return new Data<>(boxed(a), boxed(b), boxed(c));
} }
} }

View File

@ -4,8 +4,8 @@ import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import javastream.JavaStream; import javastream.JavaStream;
import javastream.Main.Config; import javastream.Main.Config;
import uk.ac.manchester.tornado.api.TaskSchedule; import uk.ac.manchester.tornado.api.TornadoExecutionPlan;
import uk.ac.manchester.tornado.api.TornadoRuntimeCI; import uk.ac.manchester.tornado.api.TornadoRuntimeInterface;
import uk.ac.manchester.tornado.api.common.TornadoDevice; import uk.ac.manchester.tornado.api.common.TornadoDevice;
import uk.ac.manchester.tornado.api.runtime.TornadoRuntime; import uk.ac.manchester.tornado.api.runtime.TornadoRuntime;
@ -13,18 +13,18 @@ abstract class GenericTornadoVMStream<T> extends JavaStream<T> {
protected final TornadoDevice device; protected final TornadoDevice device;
protected TaskSchedule copyTask; protected TornadoExecutionPlan copyTask;
protected TaskSchedule mulTask; protected TornadoExecutionPlan mulTask;
protected TaskSchedule addTask; protected TornadoExecutionPlan addTask;
protected TaskSchedule triadTask; protected TornadoExecutionPlan triadTask;
protected TaskSchedule nstreamTask; protected TornadoExecutionPlan nstreamTask;
protected TaskSchedule dotTask; protected TornadoExecutionPlan dotTask;
GenericTornadoVMStream(Config<T> config) { GenericTornadoVMStream(Config<T> config) {
super(config); super(config);
try { try {
TornadoRuntimeCI runtime = TornadoRuntime.getTornadoRuntime(); TornadoRuntimeInterface runtime = TornadoRuntime.getTornadoRuntime();
List<TornadoDevice> devices = TornadoVMStreams.enumerateDevices(runtime); List<TornadoDevice> devices = TornadoVMStreams.enumerateDevices(runtime);
device = devices.get(config.options.device); device = devices.get(config.options.device);
@ -42,10 +42,6 @@ abstract class GenericTornadoVMStream<T> extends JavaStream<T> {
} }
} }
protected static TaskSchedule mkSchedule() {
return new TaskSchedule("");
}
@Override @Override
public List<String> listDevices() { public List<String> listDevices() {
return TornadoVMStreams.enumerateDevices(TornadoRuntime.getTornadoRuntime()).stream() return TornadoVMStreams.enumerateDevices(TornadoRuntime.getTornadoRuntime()).stream()
@ -55,12 +51,12 @@ abstract class GenericTornadoVMStream<T> extends JavaStream<T> {
@Override @Override
public void initArrays() { public void initArrays() {
this.copyTask.warmup(); this.copyTask.withWarmUp();
this.mulTask.warmup(); this.mulTask.withWarmUp();
this.addTask.warmup(); this.addTask.withWarmUp();
this.triadTask.warmup(); this.triadTask.withWarmUp();
this.nstreamTask.warmup(); this.nstreamTask.withWarmUp();
this.dotTask.warmup(); this.dotTask.withWarmUp();
} }
@Override @Override

View File

@ -2,8 +2,11 @@ package javastream.tornadovm;
import java.util.Arrays; import java.util.Arrays;
import javastream.Main.Config; import javastream.Main.Config;
import uk.ac.manchester.tornado.api.TaskGraph;
import uk.ac.manchester.tornado.api.TornadoExecutionPlan;
import uk.ac.manchester.tornado.api.annotations.Parallel; import uk.ac.manchester.tornado.api.annotations.Parallel;
import uk.ac.manchester.tornado.api.annotations.Reduce; import uk.ac.manchester.tornado.api.annotations.Reduce;
import uk.ac.manchester.tornado.api.enums.DataTransferMode;
final class SpecialisedDouble extends GenericTornadoVMStream<Double> { final class SpecialisedDouble extends GenericTornadoVMStream<Double> {
@ -49,7 +52,7 @@ final class SpecialisedDouble extends GenericTornadoVMStream<Double> {
private final double[] a, b, c; private final double[] a, b, c;
private final double[] dotSum; private final double[] dotSum;
@SuppressWarnings({"PrimitiveArrayArgumentToVarargsMethod", "DuplicatedCode"}) @SuppressWarnings({"DuplicatedCode"})
SpecialisedDouble(Config<Double> config) { SpecialisedDouble(Config<Double> config) {
super(config); super(config);
final int size = config.options.arraysize; final int size = config.options.arraysize;
@ -58,12 +61,43 @@ final class SpecialisedDouble extends GenericTornadoVMStream<Double> {
b = new double[size]; b = new double[size];
c = new double[size]; c = new double[size];
dotSum = new double[1]; dotSum = new double[1];
this.copyTask = mkSchedule().task("", SpecialisedDouble::copy, size, a, c); this.copyTask =
this.mulTask = mkSchedule().task("", SpecialisedDouble::mul, size, b, c, scalar); new TornadoExecutionPlan(
this.addTask = mkSchedule().task("", SpecialisedDouble::add, size, a, b, c); new TaskGraph("copy")
this.triadTask = mkSchedule().task("", SpecialisedDouble::triad, size, a, b, c, scalar); .task("copy", SpecialisedDouble::copy, size, a, c)
this.nstreamTask = mkSchedule().task("", SpecialisedDouble::nstream, size, a, b, c, scalar); .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, c)
this.dotTask = mkSchedule().task("", SpecialisedDouble::dot_, a, b, dotSum).streamOut(dotSum); .snapshot());
this.mulTask =
new TornadoExecutionPlan(
new TaskGraph("mul")
.task("mul", SpecialisedDouble::mul, size, b, c, scalar)
.transferToDevice(DataTransferMode.FIRST_EXECUTION, b, c)
.snapshot());
this.addTask =
new TornadoExecutionPlan(
new TaskGraph("add")
.task("add", SpecialisedDouble::add, size, a, b, c)
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
.snapshot());
this.triadTask =
new TornadoExecutionPlan(
new TaskGraph("triad")
.task("triad", SpecialisedDouble::triad, size, a, b, c, scalar)
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
.snapshot());
this.nstreamTask =
new TornadoExecutionPlan(
new TaskGraph("nstream")
.task("nstream", SpecialisedDouble::nstream, size, a, b, c, scalar)
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
.snapshot());
this.dotTask =
new TornadoExecutionPlan(
new TaskGraph("dot")
.task("dot", SpecialisedDouble::dot_, a, b, dotSum)
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b)
.transferToHost(DataTransferMode.EVERY_EXECUTION, new Object[] {dotSum})
.snapshot());
} }
@Override @Override
@ -72,7 +106,7 @@ final class SpecialisedDouble extends GenericTornadoVMStream<Double> {
Arrays.fill(a, config.initA); Arrays.fill(a, config.initA);
Arrays.fill(b, config.initB); Arrays.fill(b, config.initB);
Arrays.fill(c, config.initC); Arrays.fill(c, config.initC);
TornadoVMStreams.xferToDevice(device, a, b, c); TornadoVMStreams.allocAndXferToDevice(device, a, b, c);
} }
@Override @Override
@ -81,7 +115,7 @@ final class SpecialisedDouble extends GenericTornadoVMStream<Double> {
} }
@Override @Override
public Data<Double> data() { public Data<Double> readArrays() {
TornadoVMStreams.xferFromDevice(device, a, b, c); TornadoVMStreams.xferFromDevice(device, a, b, c);
return new Data<>(boxed(a), boxed(b), boxed(c)); return new Data<>(boxed(a), boxed(b), boxed(c));
} }

View File

@ -2,8 +2,11 @@ package javastream.tornadovm;
import java.util.Arrays; import java.util.Arrays;
import javastream.Main.Config; import javastream.Main.Config;
import uk.ac.manchester.tornado.api.TaskGraph;
import uk.ac.manchester.tornado.api.TornadoExecutionPlan;
import uk.ac.manchester.tornado.api.annotations.Parallel; import uk.ac.manchester.tornado.api.annotations.Parallel;
import uk.ac.manchester.tornado.api.annotations.Reduce; import uk.ac.manchester.tornado.api.annotations.Reduce;
import uk.ac.manchester.tornado.api.enums.DataTransferMode;
final class SpecialisedFloat extends GenericTornadoVMStream<Float> { final class SpecialisedFloat extends GenericTornadoVMStream<Float> {
@ -49,7 +52,7 @@ final class SpecialisedFloat extends GenericTornadoVMStream<Float> {
private final float[] a, b, c; private final float[] a, b, c;
private final float[] dotSum; private final float[] dotSum;
@SuppressWarnings({"PrimitiveArrayArgumentToVarargsMethod", "DuplicatedCode"}) @SuppressWarnings({"DuplicatedCode"})
SpecialisedFloat(Config<Float> config) { SpecialisedFloat(Config<Float> config) {
super(config); super(config);
final int size = config.options.arraysize; final int size = config.options.arraysize;
@ -58,12 +61,43 @@ final class SpecialisedFloat extends GenericTornadoVMStream<Float> {
b = new float[size]; b = new float[size];
c = new float[size]; c = new float[size];
dotSum = new float[1]; dotSum = new float[1];
this.copyTask = mkSchedule().task("", SpecialisedFloat::copy, size, a, c); this.copyTask =
this.mulTask = mkSchedule().task("", SpecialisedFloat::mul, size, b, c, scalar); new TornadoExecutionPlan(
this.addTask = mkSchedule().task("", SpecialisedFloat::add, size, a, b, c); new TaskGraph("copy")
this.triadTask = mkSchedule().task("", SpecialisedFloat::triad, size, a, b, c, scalar); .task("copy", SpecialisedFloat::copy, size, a, c)
this.nstreamTask = mkSchedule().task("", SpecialisedFloat::nstream, size, a, b, c, scalar); .transferToDevice(DataTransferMode.FIRST_EXECUTION, a, c)
this.dotTask = mkSchedule().task("", SpecialisedFloat::dot_, a, b, dotSum).streamOut(dotSum); .snapshot());
this.mulTask =
new TornadoExecutionPlan(
new TaskGraph("mul")
.task("mul", SpecialisedFloat::mul, size, b, c, scalar)
.transferToDevice(DataTransferMode.FIRST_EXECUTION, b, c)
.snapshot());
this.addTask =
new TornadoExecutionPlan(
new TaskGraph("add")
.task("add", SpecialisedFloat::add, size, a, b, c)
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
.snapshot());
this.triadTask =
new TornadoExecutionPlan(
new TaskGraph("triad")
.task("triad", SpecialisedFloat::triad, size, a, b, c, scalar)
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
.snapshot());
this.nstreamTask =
new TornadoExecutionPlan(
new TaskGraph("nstream")
.task("nstream", SpecialisedFloat::nstream, size, a, b, c, scalar)
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
.snapshot());
this.dotTask =
new TornadoExecutionPlan(
new TaskGraph("dot")
.task("dot", SpecialisedFloat::dot_, a, b, dotSum)
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b)
.transferToHost(DataTransferMode.EVERY_EXECUTION, new Object[] {dotSum})
.snapshot());
} }
@Override @Override
@ -72,7 +106,7 @@ final class SpecialisedFloat extends GenericTornadoVMStream<Float> {
Arrays.fill(a, config.initA); Arrays.fill(a, config.initA);
Arrays.fill(b, config.initB); Arrays.fill(b, config.initB);
Arrays.fill(c, config.initC); Arrays.fill(c, config.initC);
TornadoVMStreams.xferToDevice(device, a, b, c); TornadoVMStreams.allocAndXferToDevice(device, a, b, c);
} }
@Override @Override
@ -81,7 +115,7 @@ final class SpecialisedFloat extends GenericTornadoVMStream<Float> {
} }
@Override @Override
public Data<Float> data() { public Data<Float> readArrays() {
TornadoVMStreams.xferFromDevice(device, a, b, c); TornadoVMStreams.xferFromDevice(device, a, b, c);
return new Data<>(boxed(a), boxed(b), boxed(c)); return new Data<>(boxed(a), boxed(b), boxed(c));
} }

View File

@ -1,36 +1,46 @@
package javastream.tornadovm; package javastream.tornadovm;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.function.Function; import java.util.function.Function;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.IntStream; import java.util.stream.IntStream;
import javastream.JavaStream; import javastream.JavaStream;
import javastream.Main.Config; import javastream.Main.Config;
import uk.ac.manchester.tornado.api.TornadoRuntimeCI; import uk.ac.manchester.tornado.api.TornadoRuntimeInterface;
import uk.ac.manchester.tornado.api.common.Event;
import uk.ac.manchester.tornado.api.common.TornadoDevice; import uk.ac.manchester.tornado.api.common.TornadoDevice;
import uk.ac.manchester.tornado.api.mm.TornadoGlobalObjectState; import uk.ac.manchester.tornado.api.memory.TornadoDeviceObjectState;
import uk.ac.manchester.tornado.api.memory.TornadoGlobalObjectState;
import uk.ac.manchester.tornado.api.runtime.TornadoRuntime; import uk.ac.manchester.tornado.api.runtime.TornadoRuntime;
public final class TornadoVMStreams { public final class TornadoVMStreams {
private TornadoVMStreams() {} private TornadoVMStreams() {}
static void xferToDevice(TornadoDevice device, Object... xs) { static void allocAndXferToDevice(TornadoDevice device, Object... xs) {
for (Object x : xs) { for (Object x : xs) {
TornadoGlobalObjectState state = TornadoRuntime.getTornadoRuntime().resolveObject(x); TornadoGlobalObjectState state = TornadoRuntime.getTornadoRuntime().resolveObject(x);
device.allocateObjects(
new Object[] {x}, 0, new TornadoDeviceObjectState[] {state.getDeviceState(device)});
List<Integer> writeEvent = device.ensurePresent(x, state.getDeviceState(device), null, 0, 0); List<Integer> writeEvent = device.ensurePresent(x, state.getDeviceState(device), null, 0, 0);
if (writeEvent != null) writeEvent.forEach(e -> device.resolveEvent(e).waitOn()); if (writeEvent != null) writeEvent.forEach(e -> device.resolveEvent(e).waitOn());
} }
} }
static void xferFromDevice(TornadoDevice device, Object... xs) { static void xferFromDevice(TornadoDevice device, Object... xs) {
for (Object x : xs) { Arrays.stream(xs)
.map(
x -> {
TornadoGlobalObjectState state = TornadoRuntime.getTornadoRuntime().resolveObject(x); TornadoGlobalObjectState state = TornadoRuntime.getTornadoRuntime().resolveObject(x);
device.resolveEvent(device.streamOut(x, 0, state.getDeviceState(device), null)).waitOn(); return device.resolveEvent(
} device.streamOut(x, 0, state.getDeviceState(device), null));
})
.collect(Collectors.toList())
.forEach(Event::waitOn);
} }
static List<TornadoDevice> enumerateDevices(TornadoRuntimeCI runtime) { static List<TornadoDevice> enumerateDevices(TornadoRuntimeInterface runtime) {
return IntStream.range(0, runtime.getNumDrivers()) return IntStream.range(0, runtime.getNumDrivers())
.mapToObj(runtime::getDriver) .mapToObj(runtime::getDriver)
.flatMap(d -> IntStream.range(0, d.getDeviceCount()).mapToObj(d::getDevice)) .flatMap(d -> IntStream.range(0, d.getDeviceCount()).mapToObj(d::getDevice))

View File

@ -20,6 +20,18 @@ end
@enum Benchmark All Triad Nstream @enum Benchmark All Triad Nstream
function run_init_arrays!(data::StreamData{T,C}, context, init::Tuple{T,T,T})::Float64 where {T,C}
return @elapsed init_arrays!(data, context, init)
end
function run_read_data(data::StreamData{T,C}, context)::Tuple{Float64,VectorData{T}} where {T,C}
elapsed = @elapsed begin
result = read_data(data, context)
end
return (elapsed, result)
end
function run_all!(data::StreamData{T,C}, context, times::Int)::Tuple{Timings,T} where {T,C} function run_all!(data::StreamData{T,C}, context, times::Int)::Tuple{Timings,T} where {T,C}
timings = Timings(times) timings = Timings(times)
lastSum::T = 0 lastSum::T = 0
@ -39,11 +51,7 @@ function run_triad!(data::StreamData{T,C}, context, times::Int)::Float64 where {
end end
end end
function run_nstream!( function run_nstream!(data::StreamData{T,C}, context, times::Int)::Vector{Float64} where {T,C}
data::StreamData{T,C},
context,
times::Int,
)::Vector{Float64} where {T,C}
timings::Vector{Float64} = zeros(times) timings::Vector{Float64} = zeros(times)
for i = 1:times for i = 1:times
@inbounds timings[i] = @elapsed nstream!(data, context) @inbounds timings[i] = @elapsed nstream!(data, context)
@ -93,9 +101,7 @@ function check_solutions(
error = abs((dot - gold_sum) / gold_sum) error = abs((dot - gold_sum) / gold_sum)
failed = error > 1.0e-8 failed = error > 1.0e-8
if failed if failed
println( println("Validation failed on sum. Error $error \nSum was $dot but should be $gold_sum")
"Validation failed on sum. Error $error \nSum was $dot but should be $gold_sum",
)
end end
!failed !failed
end : true end : true
@ -175,9 +181,7 @@ function main()
ds = devices() ds = devices()
# TODO implement substring device match # TODO implement substring device match
if config.device < 1 || config.device > length(ds) if config.device < 1 || config.device > length(ds)
error( error("Device $(config.device) out of range (1..$(length(ds))), NOTE: Julia is 1-indexed")
"Device $(config.device) out of range (1..$(length(ds))), NOTE: Julia is 1-indexed",
)
else else
device = ds[config.device] device = ds[config.device]
end end
@ -257,16 +261,42 @@ function main()
end end
end end
function show_init(init::Float64, read::Float64)
setup = [("Init", init, 3 * array_bytes), ("Read", read, 3 * array_bytes)]
if config.csv
tabulate(
map(
x -> [
("phase", x[1]),
("n_elements", config.arraysize),
("sizeof", x[3]),
("max_m$(config.mibibytes ? "i" : "")bytes_per_sec", mega_scale * total_bytes / x[2]),
("runtime", x[2]),
],
setup,
)...,
)
else
for (name, elapsed, total_bytes) in setup
println(
"$name: $(round(elapsed; digits=5)) s (=$(round(( mega_scale * total_bytes) / elapsed; digits = 5)) M$(config.mibibytes ? "i" : "")Bytes/sec)",
)
end
end
end
init::Tuple{type,type,type} = DefaultInit init::Tuple{type,type,type} = DefaultInit
scalar::type = DefaultScalar scalar::type = DefaultScalar
GC.enable(false) GC.enable(false)
(data, context) = make_stream(config.arraysize, scalar, device, config.csv) (data, context) = make_stream(config.arraysize, scalar, device, config.csv)
init_arrays!(data, context, init) tInit = run_init_arrays!(data, context, init)
if benchmark == All if benchmark == All
(timings, sum) = run_all!(data, context, config.numtimes) (timings, sum) = run_all!(data, context, config.numtimes)
valid = check_solutions(read_data(data, context), config.numtimes, init, benchmark, sum) (tRead, result) = run_read_data(data, context)
show_init(tInit, tRead)
valid = check_solutions(result, config.numtimes, init, benchmark, sum)
tabulate( tabulate(
mk_row(timings.copy, "Copy", 2 * array_bytes), mk_row(timings.copy, "Copy", 2 * array_bytes),
mk_row(timings.mul, "Mul", 2 * array_bytes), mk_row(timings.mul, "Mul", 2 * array_bytes),
@ -276,13 +306,15 @@ function main()
) )
elseif benchmark == Nstream elseif benchmark == Nstream
timings = run_nstream!(data, context, config.numtimes) timings = run_nstream!(data, context, config.numtimes)
valid = (tRead, result) = run_read_data(data, context)
check_solutions(read_data(data, context), config.numtimes, init, benchmark, nothing) show_init(tInit, tRead)
valid = check_solutions(result, config.numtimes, init, benchmark, nothing)
tabulate(mk_row(timings, "Nstream", 4 * array_bytes)) tabulate(mk_row(timings, "Nstream", 4 * array_bytes))
elseif benchmark == Triad elseif benchmark == Triad
elapsed = run_triad!(data, context, config.numtimes) elapsed = run_triad!(data, context, config.numtimes)
valid = (tRead, result) = run_read_data(data, context)
check_solutions(read_data(data, context), config.numtimes, init, benchmark, nothing) show_init(tInit, tRead)
valid = check_solutions(result, config.numtimes, init, benchmark, nothing)
total_bytes = 3 * array_bytes * config.numtimes total_bytes = 3 * array_bytes * config.numtimes
bandwidth = mega_scale * (total_bytes / elapsed) bandwidth = mega_scale * (total_bytes / elapsed)
println("Runtime (seconds): $(round(elapsed; digits=5))") println("Runtime (seconds): $(round(elapsed; digits=5))")
@ -290,7 +322,6 @@ function main()
else else
error("Bad benchmark $(benchmark)") error("Bad benchmark $(benchmark)")
end end
GC.enable(true) GC.enable(true)
if !valid if !valid

View File

@ -306,7 +306,9 @@ void run()
#endif #endif
auto init1 = std::chrono::high_resolution_clock::now();
stream->init_arrays(startA, startB, startC); stream->init_arrays(startA, startB, startC);
auto init2 = std::chrono::high_resolution_clock::now();
// Result of the Dot kernel, if used. // Result of the Dot kernel, if used.
T sum{}; T sum{};
@ -333,7 +335,54 @@ void run()
std::vector<T> c(ARRAY_SIZE); std::vector<T> c(ARRAY_SIZE);
auto read1 = std::chrono::high_resolution_clock::now();
stream->read_arrays(a, b, c); stream->read_arrays(a, b, c);
auto read2 = std::chrono::high_resolution_clock::now();
auto initElapsedS = std::chrono::duration_cast<std::chrono::duration<double>>(read2 - read1).count();
auto readElapsedS = std::chrono::duration_cast<std::chrono::duration<double>>(init2 - init1).count();
auto initBWps = ((mibibytes ? std::pow(2.0, -20.0) : 1.0E-6) * (3 * sizeof(T) * ARRAY_SIZE)) / initElapsedS;
auto readBWps = ((mibibytes ? std::pow(2.0, -20.0) : 1.0E-6) * (3 * sizeof(T) * ARRAY_SIZE)) / readElapsedS;
if (output_as_csv)
{
std::cout
<< "phase" << csv_separator
<< "n_elements" << csv_separator
<< "sizeof" << csv_separator
<< ((mibibytes) ? "max_mibytes_per_sec" : "max_mbytes_per_sec") << csv_separator
<< "runtime" << std::endl;
std::cout
<< "Init" << csv_separator
<< ARRAY_SIZE << csv_separator
<< sizeof(T) << csv_separator
<< initBWps << csv_separator
<< initElapsedS << std::endl;
std::cout
<< "Read" << csv_separator
<< ARRAY_SIZE << csv_separator
<< sizeof(T) << csv_separator
<< readBWps << csv_separator
<< readElapsedS << std::endl;
}
else
{
std::cout << "Init: "
<< std::setw(7)
<< initElapsedS
<< " s (="
<< initBWps
<< (mibibytes ? " MiBytes/sec" : " MBytes/sec")
<< ")" << std::endl;
std::cout << "Read: "
<< std::setw(7)
<< readElapsedS
<< " s (="
<< readBWps
<< (mibibytes ? " MiBytes/sec" : " MBytes/sec")
<< ")" << std::endl;
}
check_solution<T>(num_times, a, b, c, sum); check_solution<T>(num_times, a, b, c, sum);
// Display timing results // Display timing results

View File

@ -54,7 +54,7 @@ use_field_init_shorthand = false
force_explicit_abi = true force_explicit_abi = true
condense_wildcard_suffixes = false condense_wildcard_suffixes = false
color = "Auto" color = "Auto"
required_version = "1.4.38" required_version = "1.6.0"
unstable_features = false unstable_features = false
disable_all_formatting = false disable_all_formatting = false
skip_children = false skip_children = false

View File

@ -174,7 +174,7 @@ where StreamData<T, D, A>: RustStream<T> {
); );
} }
stream.init_arrays(); let init = stream.run_init_arrays();
let tabulate = |xs: &Vec<Duration>, name: &str, t_size: usize| -> Vec<(&str, String)> { let tabulate = |xs: &Vec<Duration>, name: &str, t_size: usize| -> Vec<(&str, String)> {
let tail = &xs[1..]; // tail only let tail = &xs[1..]; // tail only
@ -235,10 +235,47 @@ where StreamData<T, D, A>: RustStream<T> {
}; };
}; };
let show_setup = |init: Duration, read: Duration| {
let setup = vec![
("Init", init.as_secs_f64(), 3 * array_bytes),
("Read", read.as_secs_f64(), 3 * array_bytes),
];
if option.csv {
tabulate_all(
setup
.iter()
.map(|(name, elapsed, t_size)| {
vec![
("phase", name.to_string()),
("n_elements", option.arraysize.to_string()),
("sizeof", t_size.to_string()),
(
if option.mibibytes { "max_mibytes_per_sec" } else { "max_mbytes_per_sec" },
(mega_scale * (*t_size as f64) / elapsed).to_string(),
),
("runtime", elapsed.to_string()),
]
})
.collect::<Vec<_>>(),
);
} else {
for (name, elapsed, t_size) in setup {
println!(
"{}: {:.5} s (={:.5} {})",
name,
elapsed,
mega_scale * (t_size as f64) / elapsed,
if option.mibibytes { "MiBytes/sec" } else { "MBytes/sec" }
);
}
}
};
let solutions_correct = match benchmark { let solutions_correct = match benchmark {
Benchmark::All => { Benchmark::All => {
let (results, sum) = stream.run_all(option.numtimes); let (results, sum) = stream.run_all(option.numtimes);
stream.read_arrays(); let read = stream.run_read_arrays();
show_setup(init, read);
let correct = check_solution(benchmark, option.numtimes, &stream, Some(sum)); let correct = check_solution(benchmark, option.numtimes, &stream, Some(sum));
tabulate_all(vec![ tabulate_all(vec![
tabulate(&results.copy, "Copy", 2 * array_bytes), tabulate(&results.copy, "Copy", 2 * array_bytes),
@ -251,14 +288,16 @@ where StreamData<T, D, A>: RustStream<T> {
} }
Benchmark::NStream => { Benchmark::NStream => {
let results = stream.run_nstream(option.numtimes); let results = stream.run_nstream(option.numtimes);
stream.read_arrays(); let read = stream.run_read_arrays();
show_setup(init, read);
let correct = check_solution(benchmark, option.numtimes, &stream, None); let correct = check_solution(benchmark, option.numtimes, &stream, None);
tabulate_all(vec![tabulate(&results, "Nstream", 4 * array_bytes)]); tabulate_all(vec![tabulate(&results, "Nstream", 4 * array_bytes)]);
correct correct
} }
Benchmark::Triad => { Benchmark::Triad => {
let results = stream.run_triad(option.numtimes); let results = stream.run_triad(option.numtimes);
stream.read_arrays(); let read = stream.run_read_arrays();
show_setup(init, read);
let correct = check_solution(benchmark, option.numtimes, &stream, None); let correct = check_solution(benchmark, option.numtimes, &stream, None);
let total_bytes = 3 * array_bytes * option.numtimes; let total_bytes = 3 * array_bytes * option.numtimes;
let bandwidth = giga_scale * (total_bytes as f64 / results.as_secs_f64()); let bandwidth = giga_scale * (total_bytes as f64 / results.as_secs_f64());

View File

@ -132,6 +132,18 @@ pub trait RustStream<T: Default> {
fn nstream(&mut self); fn nstream(&mut self);
fn dot(&mut self) -> T; fn dot(&mut self) -> T;
fn run_init_arrays(&mut self) -> Duration {
timed(|| {
self.init_arrays();
})
}
fn run_read_arrays(&mut self) -> Duration {
timed(|| {
self.read_arrays();
})
}
fn run_all(&mut self, n: usize) -> (AllTiming<Vec<Duration>>, T) { fn run_all(&mut self, n: usize) -> (AllTiming<Vec<Duration>>, T) {
let mut timings: AllTiming<Vec<Duration>> = AllTiming { let mut timings: AllTiming<Vec<Duration>> = AllTiming {
copy: vec![Duration::default(); n], copy: vec![Duration::default(); n],

View File

@ -1 +0,0 @@
{"name":"sbt","version":"1.5.2","bspVersion":"2.0.0-M5","languages":["scala"],"argv":["/usr/lib/jvm/java-11-openjdk-11.0.11.0.9-2.fc33.x86_64/bin/java","-Xms100m","-Xmx100m","-classpath","/home/tom/.local/share/JetBrains/Toolbox/apps/IDEA-U/ch-0/211.7142.45.plugins/Scala/launcher/sbt-launch.jar","xsbt.boot.Boot","-bsp","--sbt-launch-jar=/home/tom/.local/share/JetBrains/Toolbox/apps/IDEA-U/ch-0/211.7142.45.plugins/Scala/launcher/sbt-launch.jar"]}

View File

@ -1 +1,2 @@
target/ target/
.bsp/

View File

@ -1,4 +1,4 @@
version = "3.0.0-RC2" version = "3.7.14"
runner.dialect = scala3 runner.dialect = scala3
style = defaultWithAlign style = defaultWithAlign

View File

@ -3,7 +3,7 @@ lazy val mainCls = Some("scalastream.App")
lazy val root = (project in file(".")) lazy val root = (project in file("."))
.enablePlugins(NativeImagePlugin) .enablePlugins(NativeImagePlugin)
.settings( .settings(
scalaVersion := "3.0.0", scalaVersion := "3.3.1",
version := "4.0", version := "4.0",
organization := "uk.ac.bristol.uob-hpc", organization := "uk.ac.bristol.uob-hpc",
organizationName := "University of Bristol", organizationName := "University of Bristol",
@ -11,6 +11,11 @@ lazy val root = (project in file("."))
assembly / mainClass := mainCls, assembly / mainClass := mainCls,
scalacOptions ~= filterConsoleScalacOptions, scalacOptions ~= filterConsoleScalacOptions,
assembly / assemblyJarName := "scala-stream.jar", assembly / assemblyJarName := "scala-stream.jar",
assembly / assemblyMergeStrategy := {
case PathList("module-info.class") => MergeStrategy.discard
case PathList("META-INF", "versions", xs @ _, "module-info.class") => MergeStrategy.discard
case x => (ThisBuild / assemblyMergeStrategy).value(x)
},
nativeImageOptions := Seq( nativeImageOptions := Seq(
"--no-fallback", "--no-fallback",
"-H:ReflectionConfigurationFiles=../../reflect-config.json" "-H:ReflectionConfigurationFiles=../../reflect-config.json"
@ -22,8 +27,8 @@ lazy val root = (project in file("."))
// Lazy val implementation in Scala 3 triggers an exception in nativeImage, use 2_13 for arg parsing for now otherwise we can't get to the benchmarking part // Lazy val implementation in Scala 3 triggers an exception in nativeImage, use 2_13 for arg parsing for now otherwise we can't get to the benchmarking part
("com.github.scopt" %% "scopt" % "4.0.1").cross(CrossVersion.for3Use2_13), ("com.github.scopt" %% "scopt" % "4.0.1").cross(CrossVersion.for3Use2_13),
// par also uses lazy val at some point, so it doesn't work in nativeImage // par also uses lazy val at some point, so it doesn't work in nativeImage
"org.scala-lang.modules" %% "scala-parallel-collections" % "1.0.3", "org.scala-lang.modules" %% "scala-parallel-collections" % "1.0.4",
"net.openhft" % "affinity" % "3.21ea1", "net.openhft" % "affinity" % "3.23.2",
"org.slf4j" % "slf4j-simple" % "1.7.30" // for affinity "org.slf4j" % "slf4j-simple" % "2.0.5" // for affinity
) )
) )

View File

@ -1 +1 @@
sbt.version=1.5.2 sbt.version=1.9.2

View File

@ -1,6 +1,6 @@
addSbtPlugin("com.timushev.sbt" % "sbt-updates" % "0.5.3") addSbtPlugin("com.timushev.sbt" % "sbt-updates" % "0.5.3")
addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.1.17") addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.1.20")
addSbtPlugin("org.scalameta" % "sbt-native-image" % "0.3.0") addSbtPlugin("org.scalameta" % "sbt-native-image" % "0.3.0")
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.15.0") addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.3")
addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.9.27") addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.9.27")
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.2") addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.3")

View File

@ -14,6 +14,7 @@ transparent trait ScalaStream[@specialized(Float, Double) A]:
def config: Config[A] def config: Config[A]
def initArrays(): Unit def initArrays(): Unit
def readArrays(): Unit = ()
def copy(): Unit def copy(): Unit
def mul(): Unit def mul(): Unit
def add(): Unit def add(): Unit
@ -27,6 +28,8 @@ transparent trait ScalaStream[@specialized(Float, Double) A]:
val end = System.nanoTime() val end = System.nanoTime()
FiniteDuration(end - start, TimeUnit.NANOSECONDS) -> r FiniteDuration(end - start, TimeUnit.NANOSECONDS) -> r
inline def runInitArrays(): FiniteDuration = timed(initArrays())._1
inline def runReadArrays(): FiniteDuration = timed(readArrays())._1
inline def runAll(times: Int)(using Fractional[A]): (Timings[Vector[FiniteDuration]], A) = inline def runAll(times: Int)(using Fractional[A]): (Timings[Vector[FiniteDuration]], A) =
val copy = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero) val copy = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
val mul = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero) val mul = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
@ -62,7 +65,6 @@ transparent trait ScalaStream[@specialized(Float, Double) A]:
def data(): Data[A] def data(): Data[A]
trait Fractional[@specialized(Double, Float) A]: trait Fractional[@specialized(Double, Float) A]:
def toFractional(f: Float): A def toFractional(f: Float): A
def toFractional(f: Double): A def toFractional(f: Double): A
@ -204,7 +206,7 @@ object App:
validateXs("c", vec.c, goldC) validateXs("c", vec.c, goldC)
dotSum.foreach { sum => dotSum.foreach { sum =>
val goldSum = (goldA * goldB) * (config.options.arraysize).fractional val goldSum = (goldA * goldB) * config.options.arraysize.fractional
val error = ((sum - goldSum) / goldSum).abs_ val error = ((sum - goldSum) / goldSum).abs_
if error > 1.fractional / 100000000.fractional then if error > 1.fractional / 100000000.fractional then
Console.err.println( Console.err.println(
@ -288,11 +290,38 @@ object App:
println(header.map(_._1.padTo(padding, ' ')).mkString(sep)) println(header.map(_._1.padTo(padding, ' ')).mkString(sep))
println(rows.map(_.map(_._2.padTo(padding, ' ')).mkString(sep)).mkString("\n")) println(rows.map(_.map(_._2.padTo(padding, ' ')).mkString(sep)).mkString("\n"))
def showInit(init: FiniteDuration, read: FiniteDuration): Unit = {
val setup =
Vector(("Init", init.seconds, 3 * arrayBytes), ("Read", read.seconds, 3 * arrayBytes))
if opt.csv then
tabulate(
setup.map((name, elapsed, totalBytes) =>
Vector(
"phase" -> name,
"n_elements" -> opt.arraysize.toString,
"sizeof" -> arrayBytes.toString,
s"max_m${if opt.mibibytes then "i" else ""}bytes_per_sec" ->
(megaScale * totalBytes.toDouble / elapsed).toString,
"runtime" -> elapsed.toString
)
): _*
)
else
for (name, elapsed, totalBytes) <- setup do
println(
f"$name: $elapsed%.5f s (=${megaScale * totalBytes.toDouble / elapsed}%.5f M${
if opt.mibibytes then "i" else ""
}Bytes/sec)"
)
}
val stream = mkStream(config) val stream = mkStream(config)
stream.initArrays() val init = stream.runInitArrays()
config.benchmark match config.benchmark match
case Benchmark.All => case Benchmark.All =>
val (results, sum) = stream.runAll(opt.numtimes) val (results, sum) = stream.runAll(opt.numtimes)
val read = stream.runReadArrays()
showInit(init, read)
validate(stream.data(), config, Some(sum)) validate(stream.data(), config, Some(sum))
tabulate( tabulate(
mkRow(results.copy, "Copy", 2 * arrayBytes), mkRow(results.copy, "Copy", 2 * arrayBytes),
@ -303,10 +332,14 @@ object App:
) )
case Benchmark.NStream => case Benchmark.NStream =>
val result = stream.runNStream(opt.numtimes) val result = stream.runNStream(opt.numtimes)
val read = stream.runReadArrays()
showInit(init, read)
validate(stream.data(), config) validate(stream.data(), config)
tabulate(mkRow(result, "Nstream", 4 * arrayBytes)) tabulate(mkRow(result, "Nstream", 4 * arrayBytes))
case Benchmark.Triad => case Benchmark.Triad =>
val results = stream.runTriad(opt.numtimes) val results = stream.runTriad(opt.numtimes)
val read = stream.runReadArrays()
showInit(init, read)
val totalBytes = 3 * arrayBytes * opt.numtimes val totalBytes = 3 * arrayBytes * opt.numtimes
val bandwidth = megaScale * (totalBytes / results.seconds) val bandwidth = megaScale * (totalBytes / results.seconds)
println(f"Runtime (seconds): ${results.seconds}%.5f") println(f"Runtime (seconds): ${results.seconds}%.5f")