Merge branch 'time_init_read' into develop
This commit is contained in:
commit
a27abfe296
@ -7,6 +7,8 @@ All notable changes to this project will be documented in this file.
|
|||||||
- Thrust managed memory.
|
- Thrust managed memory.
|
||||||
- HIP managed memory.
|
- HIP managed memory.
|
||||||
- New implementation using SYCL2020 USM (sycl2020-acc) and renamed original `sycl2020` to `sycl2020-acc`.
|
- New implementation using SYCL2020 USM (sycl2020-acc) and renamed original `sycl2020` to `sycl2020-acc`.
|
||||||
|
- Data initialisation and read-back timing for all models, including Java, Scala, Julia, and Rust
|
||||||
|
- Add support for the latest Aparapi (3.0.0) and TornadoVM (0.15.x) for Java
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
- RAJA CUDA CMake build issues resolved.
|
- RAJA CUDA CMake build issues resolved.
|
||||||
@ -17,6 +19,7 @@ All notable changes to this project will be documented in this file.
|
|||||||
- Number of thread-blocks in CUDA dot kernel implementation changed to 1024.
|
- Number of thread-blocks in CUDA dot kernel implementation changed to 1024.
|
||||||
- Fix compatibility of `sycl2020` (now `sycl2020-acc`) with hipSYCL.
|
- Fix compatibility of `sycl2020` (now `sycl2020-acc`) with hipSYCL.
|
||||||
- Bumped Julia compat to 1.9
|
- Bumped Julia compat to 1.9
|
||||||
|
- Bumped Scala to 3.3.1
|
||||||
- Bumped Rust to 1.74.0-nightly (13e6f24b9 2023-09-23)
|
- Bumped Rust to 1.74.0-nightly (13e6f24b9 2023-09-23)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -12,7 +12,7 @@
|
|||||||
<properties>
|
<properties>
|
||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
|
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
|
||||||
<junit.version>5.7.2</junit.version>
|
<junit.version>5.9.2</junit.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<repositories>
|
<repositories>
|
||||||
@ -27,19 +27,19 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.beust</groupId>
|
<groupId>com.beust</groupId>
|
||||||
<artifactId>jcommander</artifactId>
|
<artifactId>jcommander</artifactId>
|
||||||
<version>1.81</version>
|
<version>1.82</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>tornado</groupId>
|
<groupId>tornado</groupId>
|
||||||
<artifactId>tornado-api</artifactId>
|
<artifactId>tornado-api</artifactId>
|
||||||
<version>0.9</version>
|
<version>0.15.1</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.aparapi</groupId>
|
<groupId>com.aparapi</groupId>
|
||||||
<artifactId>aparapi</artifactId>
|
<artifactId>aparapi</artifactId>
|
||||||
<version>2.0.0</version>
|
<version>3.0.0</version>
|
||||||
<exclusions>
|
<exclusions>
|
||||||
<!-- don't pull in the entire Scala ecosystem! -->
|
<!-- don't pull in the entire Scala ecosystem! -->
|
||||||
<exclusion>
|
<exclusion>
|
||||||
|
|||||||
@ -56,7 +56,7 @@ public abstract class JavaStream<T> {
|
|||||||
|
|
||||||
protected abstract T dot();
|
protected abstract T dot();
|
||||||
|
|
||||||
protected abstract Data<T> data();
|
protected abstract Data<T> readArrays();
|
||||||
|
|
||||||
public static class EnumeratedStream<T> extends JavaStream<T> {
|
public static class EnumeratedStream<T> extends JavaStream<T> {
|
||||||
|
|
||||||
@ -113,8 +113,8 @@ public abstract class JavaStream<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Data<T> data() {
|
public Data<T> readArrays() {
|
||||||
return actual.data();
|
return actual.readArrays();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -140,6 +140,14 @@ public abstract class JavaStream<T> {
|
|||||||
return Duration.ofNanos(end - start);
|
return Duration.ofNanos(end - start);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final Duration runInitArrays() {
|
||||||
|
return timed(this::initArrays);
|
||||||
|
}
|
||||||
|
|
||||||
|
final SimpleImmutableEntry<Duration, Data<T>> runReadArrays() {
|
||||||
|
return timed(this::readArrays);
|
||||||
|
}
|
||||||
|
|
||||||
final SimpleImmutableEntry<Timings<Duration>, T> runAll(int times) {
|
final SimpleImmutableEntry<Timings<Duration>, T> runAll(int times) {
|
||||||
Timings<Duration> timings = new Timings<>();
|
Timings<Duration> timings = new Timings<>();
|
||||||
T lastSum = null;
|
T lastSum = null;
|
||||||
|
|||||||
@ -128,6 +128,40 @@ public class Main {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
static void showInit(
|
||||||
|
int totalBytes, double megaScale, Options opt, Duration init, Duration read) {
|
||||||
|
List<Entry<String, Double>> setup =
|
||||||
|
Arrays.asList(
|
||||||
|
new SimpleImmutableEntry<>("Init", durationToSeconds(init)),
|
||||||
|
new SimpleImmutableEntry<>("Read", durationToSeconds(read)));
|
||||||
|
if (opt.csv) {
|
||||||
|
tabulateCsv(
|
||||||
|
true,
|
||||||
|
setup.stream()
|
||||||
|
.map(
|
||||||
|
x ->
|
||||||
|
Arrays.asList(
|
||||||
|
new SimpleImmutableEntry<>("function", x.getKey()),
|
||||||
|
new SimpleImmutableEntry<>("n_elements", opt.arraysize + ""),
|
||||||
|
new SimpleImmutableEntry<>("sizeof", totalBytes + ""),
|
||||||
|
new SimpleImmutableEntry<>(
|
||||||
|
"max_m" + (opt.mibibytes ? "i" : "") + "bytes_per_sec",
|
||||||
|
((megaScale * (double) totalBytes / x.getValue())) + ""),
|
||||||
|
new SimpleImmutableEntry<>("runtime", x.getValue() + "")))
|
||||||
|
.toArray(List[]::new));
|
||||||
|
} else {
|
||||||
|
for (Entry<String, Double> e : setup) {
|
||||||
|
System.out.printf(
|
||||||
|
"%s: %.5f s (%.5f M%sBytes/sec)%n",
|
||||||
|
e.getKey(),
|
||||||
|
e.getValue(),
|
||||||
|
megaScale * (double) totalBytes / e.getValue(),
|
||||||
|
opt.mibibytes ? "i" : "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static <T extends Number> boolean run(
|
static <T extends Number> boolean run(
|
||||||
String name, Config<T> config, Function<Config<T>, JavaStream<T>> mkStream) {
|
String name, Config<T> config, Function<Config<T>, JavaStream<T>> mkStream) {
|
||||||
|
|
||||||
@ -183,13 +217,15 @@ public class Main {
|
|||||||
|
|
||||||
JavaStream<T> stream = mkStream.apply(config);
|
JavaStream<T> stream = mkStream.apply(config);
|
||||||
|
|
||||||
stream.initArrays();
|
Duration init = stream.runInitArrays();
|
||||||
|
|
||||||
final boolean ok;
|
final boolean ok;
|
||||||
switch (config.benchmark) {
|
switch (config.benchmark) {
|
||||||
case ALL:
|
case ALL:
|
||||||
|
{
|
||||||
Entry<Timings<Duration>, T> results = stream.runAll(opt.numtimes);
|
Entry<Timings<Duration>, T> results = stream.runAll(opt.numtimes);
|
||||||
ok = checkSolutions(stream.data(), config, Optional.of(results.getValue()));
|
SimpleImmutableEntry<Duration, Data<T>> read = stream.runReadArrays();
|
||||||
|
showInit(totalBytes, megaScale, opt, init, read.getKey());
|
||||||
|
ok = checkSolutions(read.getValue(), config, Optional.of(results.getValue()));
|
||||||
Timings<Duration> timings = results.getKey();
|
Timings<Duration> timings = results.getKey();
|
||||||
tabulateCsv(
|
tabulateCsv(
|
||||||
opt.csv,
|
opt.csv,
|
||||||
@ -199,19 +235,28 @@ public class Main {
|
|||||||
mkCsvRow(timings.triad, "Triad", 3 * arrayBytes, megaScale, opt),
|
mkCsvRow(timings.triad, "Triad", 3 * arrayBytes, megaScale, opt),
|
||||||
mkCsvRow(timings.dot, "Dot", 2 * arrayBytes, megaScale, opt));
|
mkCsvRow(timings.dot, "Dot", 2 * arrayBytes, megaScale, opt));
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case NSTREAM:
|
case NSTREAM:
|
||||||
|
{
|
||||||
List<Duration> nstreamResults = stream.runNStream(opt.numtimes);
|
List<Duration> nstreamResults = stream.runNStream(opt.numtimes);
|
||||||
ok = checkSolutions(stream.data(), config, Optional.empty());
|
SimpleImmutableEntry<Duration, Data<T>> read = stream.runReadArrays();
|
||||||
|
showInit(totalBytes, megaScale, opt, init, read.getKey());
|
||||||
|
ok = checkSolutions(read.getValue(), config, Optional.empty());
|
||||||
tabulateCsv(opt.csv, mkCsvRow(nstreamResults, "Nstream", 4 * arrayBytes, megaScale, opt));
|
tabulateCsv(opt.csv, mkCsvRow(nstreamResults, "Nstream", 4 * arrayBytes, megaScale, opt));
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case TRIAD:
|
case TRIAD:
|
||||||
|
{
|
||||||
Duration triadResult = stream.runTriad(opt.numtimes);
|
Duration triadResult = stream.runTriad(opt.numtimes);
|
||||||
ok = checkSolutions(stream.data(), config, Optional.empty());
|
SimpleImmutableEntry<Duration, Data<T>> read = stream.runReadArrays();
|
||||||
|
showInit(totalBytes, megaScale, opt, init, read.getKey());
|
||||||
|
ok = checkSolutions(read.getValue(), config, Optional.empty());
|
||||||
int triadTotalBytes = 3 * arrayBytes * opt.numtimes;
|
int triadTotalBytes = 3 * arrayBytes * opt.numtimes;
|
||||||
double bandwidth = megaScale * (triadTotalBytes / durationToSeconds(triadResult));
|
double bandwidth = megaScale * (triadTotalBytes / durationToSeconds(triadResult));
|
||||||
System.out.printf("Runtime (seconds): %.5f", durationToSeconds(triadResult));
|
System.out.printf("Runtime (seconds): %.5f", durationToSeconds(triadResult));
|
||||||
System.out.printf("Bandwidth (%s/s): %.3f ", gigaSuffix, bandwidth);
|
System.out.printf("Bandwidth (%s/s): %.3f ", gigaSuffix, bandwidth);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
throw new AssertionError();
|
throw new AssertionError();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -122,7 +122,7 @@ public final class AparapiStreams {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Data<T> data() {
|
public Data<T> readArrays() {
|
||||||
return kernels.syncAndDispose();
|
return kernels.syncAndDispose();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -86,7 +86,7 @@ final class GenericPlainStream<T extends Number> extends JavaStream<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Data<T> data() {
|
public Data<T> readArrays() {
|
||||||
return new Data<>(a, b, c);
|
return new Data<>(a, b, c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -80,7 +80,7 @@ final class GenericStream<T extends Number> extends JavaStream<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Data<T> data() {
|
public Data<T> readArrays() {
|
||||||
return new Data<>(a, b, c);
|
return new Data<>(a, b, c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -78,7 +78,7 @@ final class SpecialisedDoubleStream extends JavaStream<Double> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Data<Double> data() {
|
public Data<Double> readArrays() {
|
||||||
return new Data<>(boxed(a), boxed(b), boxed(c));
|
return new Data<>(boxed(a), boxed(b), boxed(c));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -78,7 +78,7 @@ final class SpecialisedFloatStream extends JavaStream<Float> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Data<Float> data() {
|
public Data<Float> readArrays() {
|
||||||
return new Data<>(boxed(a), boxed(b), boxed(c));
|
return new Data<>(boxed(a), boxed(b), boxed(c));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -78,7 +78,7 @@ final class SpecialisedPlainDoubleStream extends JavaStream<Double> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Data<Double> data() {
|
public Data<Double> readArrays() {
|
||||||
return new Data<>(boxed(a), boxed(b), boxed(c));
|
return new Data<>(boxed(a), boxed(b), boxed(c));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -78,7 +78,7 @@ final class SpecialisedPlainFloatStream extends JavaStream<Float> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Data<Float> data() {
|
public Data<Float> readArrays() {
|
||||||
return new Data<>(boxed(a), boxed(b), boxed(c));
|
return new Data<>(boxed(a), boxed(b), boxed(c));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -4,8 +4,8 @@ import java.util.List;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import javastream.JavaStream;
|
import javastream.JavaStream;
|
||||||
import javastream.Main.Config;
|
import javastream.Main.Config;
|
||||||
import uk.ac.manchester.tornado.api.TaskSchedule;
|
import uk.ac.manchester.tornado.api.TornadoExecutionPlan;
|
||||||
import uk.ac.manchester.tornado.api.TornadoRuntimeCI;
|
import uk.ac.manchester.tornado.api.TornadoRuntimeInterface;
|
||||||
import uk.ac.manchester.tornado.api.common.TornadoDevice;
|
import uk.ac.manchester.tornado.api.common.TornadoDevice;
|
||||||
import uk.ac.manchester.tornado.api.runtime.TornadoRuntime;
|
import uk.ac.manchester.tornado.api.runtime.TornadoRuntime;
|
||||||
|
|
||||||
@ -13,18 +13,18 @@ abstract class GenericTornadoVMStream<T> extends JavaStream<T> {
|
|||||||
|
|
||||||
protected final TornadoDevice device;
|
protected final TornadoDevice device;
|
||||||
|
|
||||||
protected TaskSchedule copyTask;
|
protected TornadoExecutionPlan copyTask;
|
||||||
protected TaskSchedule mulTask;
|
protected TornadoExecutionPlan mulTask;
|
||||||
protected TaskSchedule addTask;
|
protected TornadoExecutionPlan addTask;
|
||||||
protected TaskSchedule triadTask;
|
protected TornadoExecutionPlan triadTask;
|
||||||
protected TaskSchedule nstreamTask;
|
protected TornadoExecutionPlan nstreamTask;
|
||||||
protected TaskSchedule dotTask;
|
protected TornadoExecutionPlan dotTask;
|
||||||
|
|
||||||
GenericTornadoVMStream(Config<T> config) {
|
GenericTornadoVMStream(Config<T> config) {
|
||||||
super(config);
|
super(config);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
TornadoRuntimeCI runtime = TornadoRuntime.getTornadoRuntime();
|
TornadoRuntimeInterface runtime = TornadoRuntime.getTornadoRuntime();
|
||||||
List<TornadoDevice> devices = TornadoVMStreams.enumerateDevices(runtime);
|
List<TornadoDevice> devices = TornadoVMStreams.enumerateDevices(runtime);
|
||||||
device = devices.get(config.options.device);
|
device = devices.get(config.options.device);
|
||||||
|
|
||||||
@ -42,10 +42,6 @@ abstract class GenericTornadoVMStream<T> extends JavaStream<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static TaskSchedule mkSchedule() {
|
|
||||||
return new TaskSchedule("");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> listDevices() {
|
public List<String> listDevices() {
|
||||||
return TornadoVMStreams.enumerateDevices(TornadoRuntime.getTornadoRuntime()).stream()
|
return TornadoVMStreams.enumerateDevices(TornadoRuntime.getTornadoRuntime()).stream()
|
||||||
@ -55,12 +51,12 @@ abstract class GenericTornadoVMStream<T> extends JavaStream<T> {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void initArrays() {
|
public void initArrays() {
|
||||||
this.copyTask.warmup();
|
this.copyTask.withWarmUp();
|
||||||
this.mulTask.warmup();
|
this.mulTask.withWarmUp();
|
||||||
this.addTask.warmup();
|
this.addTask.withWarmUp();
|
||||||
this.triadTask.warmup();
|
this.triadTask.withWarmUp();
|
||||||
this.nstreamTask.warmup();
|
this.nstreamTask.withWarmUp();
|
||||||
this.dotTask.warmup();
|
this.dotTask.withWarmUp();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@ -2,8 +2,11 @@ package javastream.tornadovm;
|
|||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import javastream.Main.Config;
|
import javastream.Main.Config;
|
||||||
|
import uk.ac.manchester.tornado.api.TaskGraph;
|
||||||
|
import uk.ac.manchester.tornado.api.TornadoExecutionPlan;
|
||||||
import uk.ac.manchester.tornado.api.annotations.Parallel;
|
import uk.ac.manchester.tornado.api.annotations.Parallel;
|
||||||
import uk.ac.manchester.tornado.api.annotations.Reduce;
|
import uk.ac.manchester.tornado.api.annotations.Reduce;
|
||||||
|
import uk.ac.manchester.tornado.api.enums.DataTransferMode;
|
||||||
|
|
||||||
final class SpecialisedDouble extends GenericTornadoVMStream<Double> {
|
final class SpecialisedDouble extends GenericTornadoVMStream<Double> {
|
||||||
|
|
||||||
@ -49,7 +52,7 @@ final class SpecialisedDouble extends GenericTornadoVMStream<Double> {
|
|||||||
private final double[] a, b, c;
|
private final double[] a, b, c;
|
||||||
private final double[] dotSum;
|
private final double[] dotSum;
|
||||||
|
|
||||||
@SuppressWarnings({"PrimitiveArrayArgumentToVarargsMethod", "DuplicatedCode"})
|
@SuppressWarnings({"DuplicatedCode"})
|
||||||
SpecialisedDouble(Config<Double> config) {
|
SpecialisedDouble(Config<Double> config) {
|
||||||
super(config);
|
super(config);
|
||||||
final int size = config.options.arraysize;
|
final int size = config.options.arraysize;
|
||||||
@ -58,12 +61,43 @@ final class SpecialisedDouble extends GenericTornadoVMStream<Double> {
|
|||||||
b = new double[size];
|
b = new double[size];
|
||||||
c = new double[size];
|
c = new double[size];
|
||||||
dotSum = new double[1];
|
dotSum = new double[1];
|
||||||
this.copyTask = mkSchedule().task("", SpecialisedDouble::copy, size, a, c);
|
this.copyTask =
|
||||||
this.mulTask = mkSchedule().task("", SpecialisedDouble::mul, size, b, c, scalar);
|
new TornadoExecutionPlan(
|
||||||
this.addTask = mkSchedule().task("", SpecialisedDouble::add, size, a, b, c);
|
new TaskGraph("copy")
|
||||||
this.triadTask = mkSchedule().task("", SpecialisedDouble::triad, size, a, b, c, scalar);
|
.task("copy", SpecialisedDouble::copy, size, a, c)
|
||||||
this.nstreamTask = mkSchedule().task("", SpecialisedDouble::nstream, size, a, b, c, scalar);
|
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, c)
|
||||||
this.dotTask = mkSchedule().task("", SpecialisedDouble::dot_, a, b, dotSum).streamOut(dotSum);
|
.snapshot());
|
||||||
|
this.mulTask =
|
||||||
|
new TornadoExecutionPlan(
|
||||||
|
new TaskGraph("mul")
|
||||||
|
.task("mul", SpecialisedDouble::mul, size, b, c, scalar)
|
||||||
|
.transferToDevice(DataTransferMode.FIRST_EXECUTION, b, c)
|
||||||
|
.snapshot());
|
||||||
|
this.addTask =
|
||||||
|
new TornadoExecutionPlan(
|
||||||
|
new TaskGraph("add")
|
||||||
|
.task("add", SpecialisedDouble::add, size, a, b, c)
|
||||||
|
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
|
||||||
|
.snapshot());
|
||||||
|
this.triadTask =
|
||||||
|
new TornadoExecutionPlan(
|
||||||
|
new TaskGraph("triad")
|
||||||
|
.task("triad", SpecialisedDouble::triad, size, a, b, c, scalar)
|
||||||
|
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
|
||||||
|
.snapshot());
|
||||||
|
this.nstreamTask =
|
||||||
|
new TornadoExecutionPlan(
|
||||||
|
new TaskGraph("nstream")
|
||||||
|
.task("nstream", SpecialisedDouble::nstream, size, a, b, c, scalar)
|
||||||
|
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
|
||||||
|
.snapshot());
|
||||||
|
this.dotTask =
|
||||||
|
new TornadoExecutionPlan(
|
||||||
|
new TaskGraph("dot")
|
||||||
|
.task("dot", SpecialisedDouble::dot_, a, b, dotSum)
|
||||||
|
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b)
|
||||||
|
.transferToHost(DataTransferMode.EVERY_EXECUTION, new Object[] {dotSum})
|
||||||
|
.snapshot());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -72,7 +106,7 @@ final class SpecialisedDouble extends GenericTornadoVMStream<Double> {
|
|||||||
Arrays.fill(a, config.initA);
|
Arrays.fill(a, config.initA);
|
||||||
Arrays.fill(b, config.initB);
|
Arrays.fill(b, config.initB);
|
||||||
Arrays.fill(c, config.initC);
|
Arrays.fill(c, config.initC);
|
||||||
TornadoVMStreams.xferToDevice(device, a, b, c);
|
TornadoVMStreams.allocAndXferToDevice(device, a, b, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -81,7 +115,7 @@ final class SpecialisedDouble extends GenericTornadoVMStream<Double> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Data<Double> data() {
|
public Data<Double> readArrays() {
|
||||||
TornadoVMStreams.xferFromDevice(device, a, b, c);
|
TornadoVMStreams.xferFromDevice(device, a, b, c);
|
||||||
return new Data<>(boxed(a), boxed(b), boxed(c));
|
return new Data<>(boxed(a), boxed(b), boxed(c));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2,8 +2,11 @@ package javastream.tornadovm;
|
|||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import javastream.Main.Config;
|
import javastream.Main.Config;
|
||||||
|
import uk.ac.manchester.tornado.api.TaskGraph;
|
||||||
|
import uk.ac.manchester.tornado.api.TornadoExecutionPlan;
|
||||||
import uk.ac.manchester.tornado.api.annotations.Parallel;
|
import uk.ac.manchester.tornado.api.annotations.Parallel;
|
||||||
import uk.ac.manchester.tornado.api.annotations.Reduce;
|
import uk.ac.manchester.tornado.api.annotations.Reduce;
|
||||||
|
import uk.ac.manchester.tornado.api.enums.DataTransferMode;
|
||||||
|
|
||||||
final class SpecialisedFloat extends GenericTornadoVMStream<Float> {
|
final class SpecialisedFloat extends GenericTornadoVMStream<Float> {
|
||||||
|
|
||||||
@ -49,7 +52,7 @@ final class SpecialisedFloat extends GenericTornadoVMStream<Float> {
|
|||||||
private final float[] a, b, c;
|
private final float[] a, b, c;
|
||||||
private final float[] dotSum;
|
private final float[] dotSum;
|
||||||
|
|
||||||
@SuppressWarnings({"PrimitiveArrayArgumentToVarargsMethod", "DuplicatedCode"})
|
@SuppressWarnings({"DuplicatedCode"})
|
||||||
SpecialisedFloat(Config<Float> config) {
|
SpecialisedFloat(Config<Float> config) {
|
||||||
super(config);
|
super(config);
|
||||||
final int size = config.options.arraysize;
|
final int size = config.options.arraysize;
|
||||||
@ -58,12 +61,43 @@ final class SpecialisedFloat extends GenericTornadoVMStream<Float> {
|
|||||||
b = new float[size];
|
b = new float[size];
|
||||||
c = new float[size];
|
c = new float[size];
|
||||||
dotSum = new float[1];
|
dotSum = new float[1];
|
||||||
this.copyTask = mkSchedule().task("", SpecialisedFloat::copy, size, a, c);
|
this.copyTask =
|
||||||
this.mulTask = mkSchedule().task("", SpecialisedFloat::mul, size, b, c, scalar);
|
new TornadoExecutionPlan(
|
||||||
this.addTask = mkSchedule().task("", SpecialisedFloat::add, size, a, b, c);
|
new TaskGraph("copy")
|
||||||
this.triadTask = mkSchedule().task("", SpecialisedFloat::triad, size, a, b, c, scalar);
|
.task("copy", SpecialisedFloat::copy, size, a, c)
|
||||||
this.nstreamTask = mkSchedule().task("", SpecialisedFloat::nstream, size, a, b, c, scalar);
|
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, c)
|
||||||
this.dotTask = mkSchedule().task("", SpecialisedFloat::dot_, a, b, dotSum).streamOut(dotSum);
|
.snapshot());
|
||||||
|
this.mulTask =
|
||||||
|
new TornadoExecutionPlan(
|
||||||
|
new TaskGraph("mul")
|
||||||
|
.task("mul", SpecialisedFloat::mul, size, b, c, scalar)
|
||||||
|
.transferToDevice(DataTransferMode.FIRST_EXECUTION, b, c)
|
||||||
|
.snapshot());
|
||||||
|
this.addTask =
|
||||||
|
new TornadoExecutionPlan(
|
||||||
|
new TaskGraph("add")
|
||||||
|
.task("add", SpecialisedFloat::add, size, a, b, c)
|
||||||
|
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
|
||||||
|
.snapshot());
|
||||||
|
this.triadTask =
|
||||||
|
new TornadoExecutionPlan(
|
||||||
|
new TaskGraph("triad")
|
||||||
|
.task("triad", SpecialisedFloat::triad, size, a, b, c, scalar)
|
||||||
|
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
|
||||||
|
.snapshot());
|
||||||
|
this.nstreamTask =
|
||||||
|
new TornadoExecutionPlan(
|
||||||
|
new TaskGraph("nstream")
|
||||||
|
.task("nstream", SpecialisedFloat::nstream, size, a, b, c, scalar)
|
||||||
|
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b, c)
|
||||||
|
.snapshot());
|
||||||
|
this.dotTask =
|
||||||
|
new TornadoExecutionPlan(
|
||||||
|
new TaskGraph("dot")
|
||||||
|
.task("dot", SpecialisedFloat::dot_, a, b, dotSum)
|
||||||
|
.transferToDevice(DataTransferMode.FIRST_EXECUTION, a, b)
|
||||||
|
.transferToHost(DataTransferMode.EVERY_EXECUTION, new Object[] {dotSum})
|
||||||
|
.snapshot());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -72,7 +106,7 @@ final class SpecialisedFloat extends GenericTornadoVMStream<Float> {
|
|||||||
Arrays.fill(a, config.initA);
|
Arrays.fill(a, config.initA);
|
||||||
Arrays.fill(b, config.initB);
|
Arrays.fill(b, config.initB);
|
||||||
Arrays.fill(c, config.initC);
|
Arrays.fill(c, config.initC);
|
||||||
TornadoVMStreams.xferToDevice(device, a, b, c);
|
TornadoVMStreams.allocAndXferToDevice(device, a, b, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -81,7 +115,7 @@ final class SpecialisedFloat extends GenericTornadoVMStream<Float> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Data<Float> data() {
|
public Data<Float> readArrays() {
|
||||||
TornadoVMStreams.xferFromDevice(device, a, b, c);
|
TornadoVMStreams.xferFromDevice(device, a, b, c);
|
||||||
return new Data<>(boxed(a), boxed(b), boxed(c));
|
return new Data<>(boxed(a), boxed(b), boxed(c));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,36 +1,46 @@
|
|||||||
package javastream.tornadovm;
|
package javastream.tornadovm;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.IntStream;
|
import java.util.stream.IntStream;
|
||||||
import javastream.JavaStream;
|
import javastream.JavaStream;
|
||||||
import javastream.Main.Config;
|
import javastream.Main.Config;
|
||||||
import uk.ac.manchester.tornado.api.TornadoRuntimeCI;
|
import uk.ac.manchester.tornado.api.TornadoRuntimeInterface;
|
||||||
|
import uk.ac.manchester.tornado.api.common.Event;
|
||||||
import uk.ac.manchester.tornado.api.common.TornadoDevice;
|
import uk.ac.manchester.tornado.api.common.TornadoDevice;
|
||||||
import uk.ac.manchester.tornado.api.mm.TornadoGlobalObjectState;
|
import uk.ac.manchester.tornado.api.memory.TornadoDeviceObjectState;
|
||||||
|
import uk.ac.manchester.tornado.api.memory.TornadoGlobalObjectState;
|
||||||
import uk.ac.manchester.tornado.api.runtime.TornadoRuntime;
|
import uk.ac.manchester.tornado.api.runtime.TornadoRuntime;
|
||||||
|
|
||||||
public final class TornadoVMStreams {
|
public final class TornadoVMStreams {
|
||||||
|
|
||||||
private TornadoVMStreams() {}
|
private TornadoVMStreams() {}
|
||||||
|
|
||||||
static void xferToDevice(TornadoDevice device, Object... xs) {
|
static void allocAndXferToDevice(TornadoDevice device, Object... xs) {
|
||||||
for (Object x : xs) {
|
for (Object x : xs) {
|
||||||
TornadoGlobalObjectState state = TornadoRuntime.getTornadoRuntime().resolveObject(x);
|
TornadoGlobalObjectState state = TornadoRuntime.getTornadoRuntime().resolveObject(x);
|
||||||
|
device.allocateObjects(
|
||||||
|
new Object[] {x}, 0, new TornadoDeviceObjectState[] {state.getDeviceState(device)});
|
||||||
List<Integer> writeEvent = device.ensurePresent(x, state.getDeviceState(device), null, 0, 0);
|
List<Integer> writeEvent = device.ensurePresent(x, state.getDeviceState(device), null, 0, 0);
|
||||||
if (writeEvent != null) writeEvent.forEach(e -> device.resolveEvent(e).waitOn());
|
if (writeEvent != null) writeEvent.forEach(e -> device.resolveEvent(e).waitOn());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void xferFromDevice(TornadoDevice device, Object... xs) {
|
static void xferFromDevice(TornadoDevice device, Object... xs) {
|
||||||
for (Object x : xs) {
|
Arrays.stream(xs)
|
||||||
|
.map(
|
||||||
|
x -> {
|
||||||
TornadoGlobalObjectState state = TornadoRuntime.getTornadoRuntime().resolveObject(x);
|
TornadoGlobalObjectState state = TornadoRuntime.getTornadoRuntime().resolveObject(x);
|
||||||
device.resolveEvent(device.streamOut(x, 0, state.getDeviceState(device), null)).waitOn();
|
return device.resolveEvent(
|
||||||
}
|
device.streamOut(x, 0, state.getDeviceState(device), null));
|
||||||
|
})
|
||||||
|
.collect(Collectors.toList())
|
||||||
|
.forEach(Event::waitOn);
|
||||||
}
|
}
|
||||||
|
|
||||||
static List<TornadoDevice> enumerateDevices(TornadoRuntimeCI runtime) {
|
static List<TornadoDevice> enumerateDevices(TornadoRuntimeInterface runtime) {
|
||||||
return IntStream.range(0, runtime.getNumDrivers())
|
return IntStream.range(0, runtime.getNumDrivers())
|
||||||
.mapToObj(runtime::getDriver)
|
.mapToObj(runtime::getDriver)
|
||||||
.flatMap(d -> IntStream.range(0, d.getDeviceCount()).mapToObj(d::getDevice))
|
.flatMap(d -> IntStream.range(0, d.getDeviceCount()).mapToObj(d::getDevice))
|
||||||
|
|||||||
@ -20,6 +20,18 @@ end
|
|||||||
|
|
||||||
@enum Benchmark All Triad Nstream
|
@enum Benchmark All Triad Nstream
|
||||||
|
|
||||||
|
|
||||||
|
function run_init_arrays!(data::StreamData{T,C}, context, init::Tuple{T,T,T})::Float64 where {T,C}
|
||||||
|
return @elapsed init_arrays!(data, context, init)
|
||||||
|
end
|
||||||
|
|
||||||
|
function run_read_data(data::StreamData{T,C}, context)::Tuple{Float64,VectorData{T}} where {T,C}
|
||||||
|
elapsed = @elapsed begin
|
||||||
|
result = read_data(data, context)
|
||||||
|
end
|
||||||
|
return (elapsed, result)
|
||||||
|
end
|
||||||
|
|
||||||
function run_all!(data::StreamData{T,C}, context, times::Int)::Tuple{Timings,T} where {T,C}
|
function run_all!(data::StreamData{T,C}, context, times::Int)::Tuple{Timings,T} where {T,C}
|
||||||
timings = Timings(times)
|
timings = Timings(times)
|
||||||
lastSum::T = 0
|
lastSum::T = 0
|
||||||
@ -39,11 +51,7 @@ function run_triad!(data::StreamData{T,C}, context, times::Int)::Float64 where {
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
function run_nstream!(
|
function run_nstream!(data::StreamData{T,C}, context, times::Int)::Vector{Float64} where {T,C}
|
||||||
data::StreamData{T,C},
|
|
||||||
context,
|
|
||||||
times::Int,
|
|
||||||
)::Vector{Float64} where {T,C}
|
|
||||||
timings::Vector{Float64} = zeros(times)
|
timings::Vector{Float64} = zeros(times)
|
||||||
for i = 1:times
|
for i = 1:times
|
||||||
@inbounds timings[i] = @elapsed nstream!(data, context)
|
@inbounds timings[i] = @elapsed nstream!(data, context)
|
||||||
@ -93,9 +101,7 @@ function check_solutions(
|
|||||||
error = abs((dot - gold_sum) / gold_sum)
|
error = abs((dot - gold_sum) / gold_sum)
|
||||||
failed = error > 1.0e-8
|
failed = error > 1.0e-8
|
||||||
if failed
|
if failed
|
||||||
println(
|
println("Validation failed on sum. Error $error \nSum was $dot but should be $gold_sum")
|
||||||
"Validation failed on sum. Error $error \nSum was $dot but should be $gold_sum",
|
|
||||||
)
|
|
||||||
end
|
end
|
||||||
!failed
|
!failed
|
||||||
end : true
|
end : true
|
||||||
@ -166,7 +172,7 @@ function main()
|
|||||||
parse_options(config)
|
parse_options(config)
|
||||||
|
|
||||||
if config.list
|
if config.list
|
||||||
for (i, (_,repr, impl)) in enumerate(devices())
|
for (i, (_, repr, impl)) in enumerate(devices())
|
||||||
println("[$i] ($impl) $repr")
|
println("[$i] ($impl) $repr")
|
||||||
end
|
end
|
||||||
exit(0)
|
exit(0)
|
||||||
@ -175,9 +181,7 @@ function main()
|
|||||||
ds = devices()
|
ds = devices()
|
||||||
# TODO implement substring device match
|
# TODO implement substring device match
|
||||||
if config.device < 1 || config.device > length(ds)
|
if config.device < 1 || config.device > length(ds)
|
||||||
error(
|
error("Device $(config.device) out of range (1..$(length(ds))), NOTE: Julia is 1-indexed")
|
||||||
"Device $(config.device) out of range (1..$(length(ds))), NOTE: Julia is 1-indexed",
|
|
||||||
)
|
|
||||||
else
|
else
|
||||||
device = ds[config.device]
|
device = ds[config.device]
|
||||||
end
|
end
|
||||||
@ -257,16 +261,42 @@ function main()
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
function show_init(init::Float64, read::Float64)
|
||||||
|
setup = [("Init", init, 3 * array_bytes), ("Read", read, 3 * array_bytes)]
|
||||||
|
if config.csv
|
||||||
|
tabulate(
|
||||||
|
map(
|
||||||
|
x -> [
|
||||||
|
("phase", x[1]),
|
||||||
|
("n_elements", config.arraysize),
|
||||||
|
("sizeof", x[3]),
|
||||||
|
("max_m$(config.mibibytes ? "i" : "")bytes_per_sec", mega_scale * total_bytes / x[2]),
|
||||||
|
("runtime", x[2]),
|
||||||
|
],
|
||||||
|
setup,
|
||||||
|
)...,
|
||||||
|
)
|
||||||
|
else
|
||||||
|
for (name, elapsed, total_bytes) in setup
|
||||||
|
println(
|
||||||
|
"$name: $(round(elapsed; digits=5)) s (=$(round(( mega_scale * total_bytes) / elapsed; digits = 5)) M$(config.mibibytes ? "i" : "")Bytes/sec)",
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
init::Tuple{type,type,type} = DefaultInit
|
init::Tuple{type,type,type} = DefaultInit
|
||||||
scalar::type = DefaultScalar
|
scalar::type = DefaultScalar
|
||||||
|
|
||||||
GC.enable(false)
|
GC.enable(false)
|
||||||
|
|
||||||
(data, context) = make_stream(config.arraysize, scalar, device, config.csv)
|
(data, context) = make_stream(config.arraysize, scalar, device, config.csv)
|
||||||
init_arrays!(data, context, init)
|
tInit = run_init_arrays!(data, context, init)
|
||||||
if benchmark == All
|
if benchmark == All
|
||||||
(timings, sum) = run_all!(data, context, config.numtimes)
|
(timings, sum) = run_all!(data, context, config.numtimes)
|
||||||
valid = check_solutions(read_data(data, context), config.numtimes, init, benchmark, sum)
|
(tRead, result) = run_read_data(data, context)
|
||||||
|
show_init(tInit, tRead)
|
||||||
|
valid = check_solutions(result, config.numtimes, init, benchmark, sum)
|
||||||
tabulate(
|
tabulate(
|
||||||
mk_row(timings.copy, "Copy", 2 * array_bytes),
|
mk_row(timings.copy, "Copy", 2 * array_bytes),
|
||||||
mk_row(timings.mul, "Mul", 2 * array_bytes),
|
mk_row(timings.mul, "Mul", 2 * array_bytes),
|
||||||
@ -276,13 +306,15 @@ function main()
|
|||||||
)
|
)
|
||||||
elseif benchmark == Nstream
|
elseif benchmark == Nstream
|
||||||
timings = run_nstream!(data, context, config.numtimes)
|
timings = run_nstream!(data, context, config.numtimes)
|
||||||
valid =
|
(tRead, result) = run_read_data(data, context)
|
||||||
check_solutions(read_data(data, context), config.numtimes, init, benchmark, nothing)
|
show_init(tInit, tRead)
|
||||||
|
valid = check_solutions(result, config.numtimes, init, benchmark, nothing)
|
||||||
tabulate(mk_row(timings, "Nstream", 4 * array_bytes))
|
tabulate(mk_row(timings, "Nstream", 4 * array_bytes))
|
||||||
elseif benchmark == Triad
|
elseif benchmark == Triad
|
||||||
elapsed = run_triad!(data, context, config.numtimes)
|
elapsed = run_triad!(data, context, config.numtimes)
|
||||||
valid =
|
(tRead, result) = run_read_data(data, context)
|
||||||
check_solutions(read_data(data, context), config.numtimes, init, benchmark, nothing)
|
show_init(tInit, tRead)
|
||||||
|
valid = check_solutions(result, config.numtimes, init, benchmark, nothing)
|
||||||
total_bytes = 3 * array_bytes * config.numtimes
|
total_bytes = 3 * array_bytes * config.numtimes
|
||||||
bandwidth = mega_scale * (total_bytes / elapsed)
|
bandwidth = mega_scale * (total_bytes / elapsed)
|
||||||
println("Runtime (seconds): $(round(elapsed; digits=5))")
|
println("Runtime (seconds): $(round(elapsed; digits=5))")
|
||||||
@ -290,7 +322,6 @@ function main()
|
|||||||
else
|
else
|
||||||
error("Bad benchmark $(benchmark)")
|
error("Bad benchmark $(benchmark)")
|
||||||
end
|
end
|
||||||
|
|
||||||
GC.enable(true)
|
GC.enable(true)
|
||||||
|
|
||||||
if !valid
|
if !valid
|
||||||
|
|||||||
49
src/main.cpp
49
src/main.cpp
@ -306,7 +306,9 @@ void run()
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
auto init1 = std::chrono::high_resolution_clock::now();
|
||||||
stream->init_arrays(startA, startB, startC);
|
stream->init_arrays(startA, startB, startC);
|
||||||
|
auto init2 = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
// Result of the Dot kernel, if used.
|
// Result of the Dot kernel, if used.
|
||||||
T sum{};
|
T sum{};
|
||||||
@ -333,7 +335,54 @@ void run()
|
|||||||
std::vector<T> c(ARRAY_SIZE);
|
std::vector<T> c(ARRAY_SIZE);
|
||||||
|
|
||||||
|
|
||||||
|
auto read1 = std::chrono::high_resolution_clock::now();
|
||||||
stream->read_arrays(a, b, c);
|
stream->read_arrays(a, b, c);
|
||||||
|
auto read2 = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
auto initElapsedS = std::chrono::duration_cast<std::chrono::duration<double>>(read2 - read1).count();
|
||||||
|
auto readElapsedS = std::chrono::duration_cast<std::chrono::duration<double>>(init2 - init1).count();
|
||||||
|
auto initBWps = ((mibibytes ? std::pow(2.0, -20.0) : 1.0E-6) * (3 * sizeof(T) * ARRAY_SIZE)) / initElapsedS;
|
||||||
|
auto readBWps = ((mibibytes ? std::pow(2.0, -20.0) : 1.0E-6) * (3 * sizeof(T) * ARRAY_SIZE)) / readElapsedS;
|
||||||
|
|
||||||
|
if (output_as_csv)
|
||||||
|
{
|
||||||
|
std::cout
|
||||||
|
<< "phase" << csv_separator
|
||||||
|
<< "n_elements" << csv_separator
|
||||||
|
<< "sizeof" << csv_separator
|
||||||
|
<< ((mibibytes) ? "max_mibytes_per_sec" : "max_mbytes_per_sec") << csv_separator
|
||||||
|
<< "runtime" << std::endl;
|
||||||
|
std::cout
|
||||||
|
<< "Init" << csv_separator
|
||||||
|
<< ARRAY_SIZE << csv_separator
|
||||||
|
<< sizeof(T) << csv_separator
|
||||||
|
<< initBWps << csv_separator
|
||||||
|
<< initElapsedS << std::endl;
|
||||||
|
std::cout
|
||||||
|
<< "Read" << csv_separator
|
||||||
|
<< ARRAY_SIZE << csv_separator
|
||||||
|
<< sizeof(T) << csv_separator
|
||||||
|
<< readBWps << csv_separator
|
||||||
|
<< readElapsedS << std::endl;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << "Init: "
|
||||||
|
<< std::setw(7)
|
||||||
|
<< initElapsedS
|
||||||
|
<< " s (="
|
||||||
|
<< initBWps
|
||||||
|
<< (mibibytes ? " MiBytes/sec" : " MBytes/sec")
|
||||||
|
<< ")" << std::endl;
|
||||||
|
std::cout << "Read: "
|
||||||
|
<< std::setw(7)
|
||||||
|
<< readElapsedS
|
||||||
|
<< " s (="
|
||||||
|
<< readBWps
|
||||||
|
<< (mibibytes ? " MiBytes/sec" : " MBytes/sec")
|
||||||
|
<< ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
check_solution<T>(num_times, a, b, c, sum);
|
check_solution<T>(num_times, a, b, c, sum);
|
||||||
|
|
||||||
// Display timing results
|
// Display timing results
|
||||||
|
|||||||
@ -54,7 +54,7 @@ use_field_init_shorthand = false
|
|||||||
force_explicit_abi = true
|
force_explicit_abi = true
|
||||||
condense_wildcard_suffixes = false
|
condense_wildcard_suffixes = false
|
||||||
color = "Auto"
|
color = "Auto"
|
||||||
required_version = "1.4.38"
|
required_version = "1.6.0"
|
||||||
unstable_features = false
|
unstable_features = false
|
||||||
disable_all_formatting = false
|
disable_all_formatting = false
|
||||||
skip_children = false
|
skip_children = false
|
||||||
|
|||||||
@ -174,7 +174,7 @@ where StreamData<T, D, A>: RustStream<T> {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
stream.init_arrays();
|
let init = stream.run_init_arrays();
|
||||||
|
|
||||||
let tabulate = |xs: &Vec<Duration>, name: &str, t_size: usize| -> Vec<(&str, String)> {
|
let tabulate = |xs: &Vec<Duration>, name: &str, t_size: usize| -> Vec<(&str, String)> {
|
||||||
let tail = &xs[1..]; // tail only
|
let tail = &xs[1..]; // tail only
|
||||||
@ -235,10 +235,47 @@ where StreamData<T, D, A>: RustStream<T> {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let show_setup = |init: Duration, read: Duration| {
|
||||||
|
let setup = vec![
|
||||||
|
("Init", init.as_secs_f64(), 3 * array_bytes),
|
||||||
|
("Read", read.as_secs_f64(), 3 * array_bytes),
|
||||||
|
];
|
||||||
|
if option.csv {
|
||||||
|
tabulate_all(
|
||||||
|
setup
|
||||||
|
.iter()
|
||||||
|
.map(|(name, elapsed, t_size)| {
|
||||||
|
vec![
|
||||||
|
("phase", name.to_string()),
|
||||||
|
("n_elements", option.arraysize.to_string()),
|
||||||
|
("sizeof", t_size.to_string()),
|
||||||
|
(
|
||||||
|
if option.mibibytes { "max_mibytes_per_sec" } else { "max_mbytes_per_sec" },
|
||||||
|
(mega_scale * (*t_size as f64) / elapsed).to_string(),
|
||||||
|
),
|
||||||
|
("runtime", elapsed.to_string()),
|
||||||
|
]
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
for (name, elapsed, t_size) in setup {
|
||||||
|
println!(
|
||||||
|
"{}: {:.5} s (={:.5} {})",
|
||||||
|
name,
|
||||||
|
elapsed,
|
||||||
|
mega_scale * (t_size as f64) / elapsed,
|
||||||
|
if option.mibibytes { "MiBytes/sec" } else { "MBytes/sec" }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let solutions_correct = match benchmark {
|
let solutions_correct = match benchmark {
|
||||||
Benchmark::All => {
|
Benchmark::All => {
|
||||||
let (results, sum) = stream.run_all(option.numtimes);
|
let (results, sum) = stream.run_all(option.numtimes);
|
||||||
stream.read_arrays();
|
let read = stream.run_read_arrays();
|
||||||
|
show_setup(init, read);
|
||||||
let correct = check_solution(benchmark, option.numtimes, &stream, Some(sum));
|
let correct = check_solution(benchmark, option.numtimes, &stream, Some(sum));
|
||||||
tabulate_all(vec![
|
tabulate_all(vec![
|
||||||
tabulate(&results.copy, "Copy", 2 * array_bytes),
|
tabulate(&results.copy, "Copy", 2 * array_bytes),
|
||||||
@ -251,14 +288,16 @@ where StreamData<T, D, A>: RustStream<T> {
|
|||||||
}
|
}
|
||||||
Benchmark::NStream => {
|
Benchmark::NStream => {
|
||||||
let results = stream.run_nstream(option.numtimes);
|
let results = stream.run_nstream(option.numtimes);
|
||||||
stream.read_arrays();
|
let read = stream.run_read_arrays();
|
||||||
|
show_setup(init, read);
|
||||||
let correct = check_solution(benchmark, option.numtimes, &stream, None);
|
let correct = check_solution(benchmark, option.numtimes, &stream, None);
|
||||||
tabulate_all(vec![tabulate(&results, "Nstream", 4 * array_bytes)]);
|
tabulate_all(vec![tabulate(&results, "Nstream", 4 * array_bytes)]);
|
||||||
correct
|
correct
|
||||||
}
|
}
|
||||||
Benchmark::Triad => {
|
Benchmark::Triad => {
|
||||||
let results = stream.run_triad(option.numtimes);
|
let results = stream.run_triad(option.numtimes);
|
||||||
stream.read_arrays();
|
let read = stream.run_read_arrays();
|
||||||
|
show_setup(init, read);
|
||||||
let correct = check_solution(benchmark, option.numtimes, &stream, None);
|
let correct = check_solution(benchmark, option.numtimes, &stream, None);
|
||||||
let total_bytes = 3 * array_bytes * option.numtimes;
|
let total_bytes = 3 * array_bytes * option.numtimes;
|
||||||
let bandwidth = giga_scale * (total_bytes as f64 / results.as_secs_f64());
|
let bandwidth = giga_scale * (total_bytes as f64 / results.as_secs_f64());
|
||||||
|
|||||||
@ -132,6 +132,18 @@ pub trait RustStream<T: Default> {
|
|||||||
fn nstream(&mut self);
|
fn nstream(&mut self);
|
||||||
fn dot(&mut self) -> T;
|
fn dot(&mut self) -> T;
|
||||||
|
|
||||||
|
fn run_init_arrays(&mut self) -> Duration {
|
||||||
|
timed(|| {
|
||||||
|
self.init_arrays();
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run_read_arrays(&mut self) -> Duration {
|
||||||
|
timed(|| {
|
||||||
|
self.read_arrays();
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
fn run_all(&mut self, n: usize) -> (AllTiming<Vec<Duration>>, T) {
|
fn run_all(&mut self, n: usize) -> (AllTiming<Vec<Duration>>, T) {
|
||||||
let mut timings: AllTiming<Vec<Duration>> = AllTiming {
|
let mut timings: AllTiming<Vec<Duration>> = AllTiming {
|
||||||
copy: vec![Duration::default(); n],
|
copy: vec![Duration::default(); n],
|
||||||
|
|||||||
@ -1 +0,0 @@
|
|||||||
{"name":"sbt","version":"1.5.2","bspVersion":"2.0.0-M5","languages":["scala"],"argv":["/usr/lib/jvm/java-11-openjdk-11.0.11.0.9-2.fc33.x86_64/bin/java","-Xms100m","-Xmx100m","-classpath","/home/tom/.local/share/JetBrains/Toolbox/apps/IDEA-U/ch-0/211.7142.45.plugins/Scala/launcher/sbt-launch.jar","xsbt.boot.Boot","-bsp","--sbt-launch-jar=/home/tom/.local/share/JetBrains/Toolbox/apps/IDEA-U/ch-0/211.7142.45.plugins/Scala/launcher/sbt-launch.jar"]}
|
|
||||||
1
src/scala/scala-stream/.gitignore
vendored
1
src/scala/scala-stream/.gitignore
vendored
@ -1 +1,2 @@
|
|||||||
target/
|
target/
|
||||||
|
.bsp/
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
version = "3.0.0-RC2"
|
version = "3.7.14"
|
||||||
runner.dialect = scala3
|
runner.dialect = scala3
|
||||||
|
|
||||||
style = defaultWithAlign
|
style = defaultWithAlign
|
||||||
|
|||||||
@ -3,7 +3,7 @@ lazy val mainCls = Some("scalastream.App")
|
|||||||
lazy val root = (project in file("."))
|
lazy val root = (project in file("."))
|
||||||
.enablePlugins(NativeImagePlugin)
|
.enablePlugins(NativeImagePlugin)
|
||||||
.settings(
|
.settings(
|
||||||
scalaVersion := "3.0.0",
|
scalaVersion := "3.3.1",
|
||||||
version := "4.0",
|
version := "4.0",
|
||||||
organization := "uk.ac.bristol.uob-hpc",
|
organization := "uk.ac.bristol.uob-hpc",
|
||||||
organizationName := "University of Bristol",
|
organizationName := "University of Bristol",
|
||||||
@ -11,6 +11,11 @@ lazy val root = (project in file("."))
|
|||||||
assembly / mainClass := mainCls,
|
assembly / mainClass := mainCls,
|
||||||
scalacOptions ~= filterConsoleScalacOptions,
|
scalacOptions ~= filterConsoleScalacOptions,
|
||||||
assembly / assemblyJarName := "scala-stream.jar",
|
assembly / assemblyJarName := "scala-stream.jar",
|
||||||
|
assembly / assemblyMergeStrategy := {
|
||||||
|
case PathList("module-info.class") => MergeStrategy.discard
|
||||||
|
case PathList("META-INF", "versions", xs @ _, "module-info.class") => MergeStrategy.discard
|
||||||
|
case x => (ThisBuild / assemblyMergeStrategy).value(x)
|
||||||
|
},
|
||||||
nativeImageOptions := Seq(
|
nativeImageOptions := Seq(
|
||||||
"--no-fallback",
|
"--no-fallback",
|
||||||
"-H:ReflectionConfigurationFiles=../../reflect-config.json"
|
"-H:ReflectionConfigurationFiles=../../reflect-config.json"
|
||||||
@ -22,8 +27,8 @@ lazy val root = (project in file("."))
|
|||||||
// Lazy val implementation in Scala 3 triggers an exception in nativeImage, use 2_13 for arg parsing for now otherwise we can't get to the benchmarking part
|
// Lazy val implementation in Scala 3 triggers an exception in nativeImage, use 2_13 for arg parsing for now otherwise we can't get to the benchmarking part
|
||||||
("com.github.scopt" %% "scopt" % "4.0.1").cross(CrossVersion.for3Use2_13),
|
("com.github.scopt" %% "scopt" % "4.0.1").cross(CrossVersion.for3Use2_13),
|
||||||
// par also uses lazy val at some point, so it doesn't work in nativeImage
|
// par also uses lazy val at some point, so it doesn't work in nativeImage
|
||||||
"org.scala-lang.modules" %% "scala-parallel-collections" % "1.0.3",
|
"org.scala-lang.modules" %% "scala-parallel-collections" % "1.0.4",
|
||||||
"net.openhft" % "affinity" % "3.21ea1",
|
"net.openhft" % "affinity" % "3.23.2",
|
||||||
"org.slf4j" % "slf4j-simple" % "1.7.30" // for affinity
|
"org.slf4j" % "slf4j-simple" % "2.0.5" // for affinity
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
sbt.version=1.5.2
|
sbt.version=1.9.2
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
addSbtPlugin("com.timushev.sbt" % "sbt-updates" % "0.5.3")
|
addSbtPlugin("com.timushev.sbt" % "sbt-updates" % "0.5.3")
|
||||||
addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.1.17")
|
addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.1.20")
|
||||||
addSbtPlugin("org.scalameta" % "sbt-native-image" % "0.3.0")
|
addSbtPlugin("org.scalameta" % "sbt-native-image" % "0.3.0")
|
||||||
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.15.0")
|
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.3")
|
||||||
addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.9.27")
|
addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.9.27")
|
||||||
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.2")
|
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.3")
|
||||||
|
|||||||
@ -14,6 +14,7 @@ transparent trait ScalaStream[@specialized(Float, Double) A]:
|
|||||||
def config: Config[A]
|
def config: Config[A]
|
||||||
|
|
||||||
def initArrays(): Unit
|
def initArrays(): Unit
|
||||||
|
def readArrays(): Unit = ()
|
||||||
def copy(): Unit
|
def copy(): Unit
|
||||||
def mul(): Unit
|
def mul(): Unit
|
||||||
def add(): Unit
|
def add(): Unit
|
||||||
@ -27,6 +28,8 @@ transparent trait ScalaStream[@specialized(Float, Double) A]:
|
|||||||
val end = System.nanoTime()
|
val end = System.nanoTime()
|
||||||
FiniteDuration(end - start, TimeUnit.NANOSECONDS) -> r
|
FiniteDuration(end - start, TimeUnit.NANOSECONDS) -> r
|
||||||
|
|
||||||
|
inline def runInitArrays(): FiniteDuration = timed(initArrays())._1
|
||||||
|
inline def runReadArrays(): FiniteDuration = timed(readArrays())._1
|
||||||
inline def runAll(times: Int)(using Fractional[A]): (Timings[Vector[FiniteDuration]], A) =
|
inline def runAll(times: Int)(using Fractional[A]): (Timings[Vector[FiniteDuration]], A) =
|
||||||
val copy = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
|
val copy = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
|
||||||
val mul = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
|
val mul = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
|
||||||
@ -62,7 +65,6 @@ transparent trait ScalaStream[@specialized(Float, Double) A]:
|
|||||||
|
|
||||||
def data(): Data[A]
|
def data(): Data[A]
|
||||||
|
|
||||||
|
|
||||||
trait Fractional[@specialized(Double, Float) A]:
|
trait Fractional[@specialized(Double, Float) A]:
|
||||||
def toFractional(f: Float): A
|
def toFractional(f: Float): A
|
||||||
def toFractional(f: Double): A
|
def toFractional(f: Double): A
|
||||||
@ -204,7 +206,7 @@ object App:
|
|||||||
validateXs("c", vec.c, goldC)
|
validateXs("c", vec.c, goldC)
|
||||||
|
|
||||||
dotSum.foreach { sum =>
|
dotSum.foreach { sum =>
|
||||||
val goldSum = (goldA * goldB) * (config.options.arraysize).fractional
|
val goldSum = (goldA * goldB) * config.options.arraysize.fractional
|
||||||
val error = ((sum - goldSum) / goldSum).abs_
|
val error = ((sum - goldSum) / goldSum).abs_
|
||||||
if error > 1.fractional / 100000000.fractional then
|
if error > 1.fractional / 100000000.fractional then
|
||||||
Console.err.println(
|
Console.err.println(
|
||||||
@ -288,11 +290,38 @@ object App:
|
|||||||
println(header.map(_._1.padTo(padding, ' ')).mkString(sep))
|
println(header.map(_._1.padTo(padding, ' ')).mkString(sep))
|
||||||
println(rows.map(_.map(_._2.padTo(padding, ' ')).mkString(sep)).mkString("\n"))
|
println(rows.map(_.map(_._2.padTo(padding, ' ')).mkString(sep)).mkString("\n"))
|
||||||
|
|
||||||
|
def showInit(init: FiniteDuration, read: FiniteDuration): Unit = {
|
||||||
|
val setup =
|
||||||
|
Vector(("Init", init.seconds, 3 * arrayBytes), ("Read", read.seconds, 3 * arrayBytes))
|
||||||
|
if opt.csv then
|
||||||
|
tabulate(
|
||||||
|
setup.map((name, elapsed, totalBytes) =>
|
||||||
|
Vector(
|
||||||
|
"phase" -> name,
|
||||||
|
"n_elements" -> opt.arraysize.toString,
|
||||||
|
"sizeof" -> arrayBytes.toString,
|
||||||
|
s"max_m${if opt.mibibytes then "i" else ""}bytes_per_sec" ->
|
||||||
|
(megaScale * totalBytes.toDouble / elapsed).toString,
|
||||||
|
"runtime" -> elapsed.toString
|
||||||
|
)
|
||||||
|
): _*
|
||||||
|
)
|
||||||
|
else
|
||||||
|
for (name, elapsed, totalBytes) <- setup do
|
||||||
|
println(
|
||||||
|
f"$name: $elapsed%.5f s (=${megaScale * totalBytes.toDouble / elapsed}%.5f M${
|
||||||
|
if opt.mibibytes then "i" else ""
|
||||||
|
}Bytes/sec)"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
val stream = mkStream(config)
|
val stream = mkStream(config)
|
||||||
stream.initArrays()
|
val init = stream.runInitArrays()
|
||||||
config.benchmark match
|
config.benchmark match
|
||||||
case Benchmark.All =>
|
case Benchmark.All =>
|
||||||
val (results, sum) = stream.runAll(opt.numtimes)
|
val (results, sum) = stream.runAll(opt.numtimes)
|
||||||
|
val read = stream.runReadArrays()
|
||||||
|
showInit(init, read)
|
||||||
validate(stream.data(), config, Some(sum))
|
validate(stream.data(), config, Some(sum))
|
||||||
tabulate(
|
tabulate(
|
||||||
mkRow(results.copy, "Copy", 2 * arrayBytes),
|
mkRow(results.copy, "Copy", 2 * arrayBytes),
|
||||||
@ -303,10 +332,14 @@ object App:
|
|||||||
)
|
)
|
||||||
case Benchmark.NStream =>
|
case Benchmark.NStream =>
|
||||||
val result = stream.runNStream(opt.numtimes)
|
val result = stream.runNStream(opt.numtimes)
|
||||||
|
val read = stream.runReadArrays()
|
||||||
|
showInit(init, read)
|
||||||
validate(stream.data(), config)
|
validate(stream.data(), config)
|
||||||
tabulate(mkRow(result, "Nstream", 4 * arrayBytes))
|
tabulate(mkRow(result, "Nstream", 4 * arrayBytes))
|
||||||
case Benchmark.Triad =>
|
case Benchmark.Triad =>
|
||||||
val results = stream.runTriad(opt.numtimes)
|
val results = stream.runTriad(opt.numtimes)
|
||||||
|
val read = stream.runReadArrays()
|
||||||
|
showInit(init, read)
|
||||||
val totalBytes = 3 * arrayBytes * opt.numtimes
|
val totalBytes = 3 * arrayBytes * opt.numtimes
|
||||||
val bandwidth = megaScale * (totalBytes / results.seconds)
|
val bandwidth = megaScale * (totalBytes / results.seconds)
|
||||||
println(f"Runtime (seconds): ${results.seconds}%.5f")
|
println(f"Runtime (seconds): ${results.seconds}%.5f")
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user