Add integration tests and CI

Fix wrong nstream in plain_stream
This commit is contained in:
Tom Lin 2021-06-16 01:11:40 +01:00
parent fdb2c181cc
commit ce4d6cfbfb
7 changed files with 524 additions and 412 deletions

View File

@ -3,6 +3,21 @@ on: [push, pull_request]
jobs:
test-rust:
runs-on: ubuntu-18.04
defaults:
run:
working-directory: ./rust-stream
steps:
- uses: actions/checkout@v2
- name: Setup project
run: rustup install nightly
- name: Compile project
run: cargo +nightly build --release
- name: Test project
run: cargo +nightly test --release
- name: Test run project
run: ./target/release/rust-stream --arraysize 2048
test:
runs-on: ubuntu-18.04
steps:

56
rust-stream/Cargo.lock generated
View File

@ -326,6 +326,15 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "pest"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53"
dependencies = [
"ucd-trie",
]
[[package]]
name = "proc-macro-error"
version = "1.0.4"
@ -402,6 +411,19 @@ dependencies = [
"bitflags",
]
[[package]]
name = "rstest"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "041bb0202c14f6a158bbbf086afb03d0c6e975c2dec7d4912f8061ed44f290af"
dependencies = [
"cfg-if",
"proc-macro2",
"quote",
"rustc_version",
"syn",
]
[[package]]
name = "rust-stream"
version = "3.4.0"
@ -413,11 +435,21 @@ dependencies = [
"num-traits",
"num_cpus",
"rayon",
"rstest",
"rustversion",
"structopt",
"tabular",
]
[[package]]
name = "rustc_version"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0dfe2087c51c460008730de8b57e6a320782fbfb312e1f4d520e6c6fae155ee"
dependencies = [
"semver",
]
[[package]]
name = "rustversion"
version = "1.0.5"
@ -430,6 +462,24 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "semver"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6"
dependencies = [
"semver-parser",
]
[[package]]
name = "semver-parser"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7"
dependencies = [
"pest",
]
[[package]]
name = "signal-hook"
version = "0.1.17"
@ -515,6 +565,12 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "ucd-trie"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c"
[[package]]
name = "unicode-segmentation"
version = "1.7.1"

View File

@ -18,6 +18,9 @@ libc = "0.2.97"
core_affinity = "0.5.10"
colour = "0.6.0"
[dev-dependencies]
rstest = "0.10.0"
[build-dependencies]
rustversion = "1.0"

430
rust-stream/src/lib.rs Normal file
View File

@ -0,0 +1,430 @@
#![feature(allocator_api)]
#![feature(vec_into_raw_parts)]
use std::alloc::System;
use std::fmt::{Debug, Display};
use std::iter::Sum;
use std::mem::size_of;
use std::time::Duration;
use num_traits::abs;
use structopt::StructOpt;
use tabular::{Row, Table};
use crate::crossbeam_stream::ThreadedDevice;
use crate::plain_stream::SerialDevice;
use crate::rayon_stream::RayonDevice;
use crate::stream::{AllocatorType, ArrayType, RustStream, StreamData};
mod crossbeam_stream;
mod plain_stream;
mod rayon_stream;
mod stream;
#[derive(Debug, StructOpt)]
struct Options {
/// List available devices
#[structopt(long)]
list: bool,
/// Select device at <device>
#[structopt(long, default_value = "0")]
device: usize,
/// Run the test <numtimes> times (NUM >= 2)
#[structopt(long, short = "n", default_value = "100")]
numtimes: usize,
/// Use <arraysize> elements in the array
#[structopt(long, short = "s", default_value = "33554432")]
arraysize: usize,
/// Use floats (rather than doubles)
#[structopt(long)]
float: bool,
/// Only run triad
#[structopt(long)]
triad_only: bool,
/// Only run nstream
#[structopt(long)]
nstream_only: bool,
/// Output as csv table
#[structopt(long)]
csv: bool,
/// Use MiB=2^20 for bandwidth calculation (default MB=10^6)
#[structopt(long)]
mibibytes: bool,
/// Use libc malloc instead of the Rust's allocator for benchmark array allocation
#[structopt(name = "malloc", long)]
malloc: bool,
/// Initialise each benchmark array at allocation time on the main thread
#[structopt(name = "init", long)]
init: bool,
/// Pin threads to distinct cores, this has NO effect in Rayon devices
#[structopt(long)]
pin: bool,
}
#[derive(PartialEq)]
enum Benchmark {
All,
Triad,
NStream,
}
fn check_solution<T: ArrayType + Display + Sum + Into<f64>, D, A: AllocatorType>(
benchmark: Benchmark, numtimes: usize, vec: &StreamData<T, D, A>, dot_sum: Option<T>,
) -> bool {
let (mut gold_a, mut gold_b, mut gold_c) = vec.init;
for _ in 0..numtimes {
match benchmark {
Benchmark::All => {
gold_c = gold_a;
gold_b = vec.scalar * gold_c;
gold_c = gold_a + gold_b;
gold_a = gold_b + vec.scalar * gold_c;
}
Benchmark::Triad => {
gold_a = gold_b + vec.scalar * gold_c;
}
Benchmark::NStream => {
gold_a += gold_b + vec.scalar * gold_c;
}
};
}
let tolerance = T::epsilon().into() * 100.0f64;
let validate_xs = |name: &str, xs: &Vec<T, A>, from: T| {
let error = (xs.iter().map(|x| abs(*x - from)).sum::<T>()).into() / xs.len() as f64;
let fail = error > tolerance;
if fail {
eprintln!("Validation failed on {}[]. Average error {} ", name, error);
}
!fail
};
let a_ok = validate_xs("a", &vec.a, gold_a);
let b_ok = validate_xs("b", &vec.b, gold_b);
let c_ok = validate_xs("c", &vec.c, gold_c);
let dot_ok = dot_sum.map_or(true, |sum| {
let gold_sum = (gold_a * gold_b).into() * vec.size as f64;
let error = abs((sum.into() - gold_sum) / gold_sum);
let fail = error > 1.0e-8;
if fail {
eprintln!(
"Validation failed on sum. Error {} \nSum was {} but should be {}",
error, sum, gold_sum
);
}
!fail
});
a_ok && b_ok && c_ok && dot_ok
}
fn run_cpu<T: ArrayType + Sync + Send + Sum + Into<f64> + Display, D, A: AllocatorType>(
option: &Options, mut stream: StreamData<T, D, A>,
) -> bool
where
StreamData<T, D, A>: RustStream<T>,
{
let benchmark = match (option.nstream_only, option.triad_only) {
(true, false) => Benchmark::NStream,
(false, true) => Benchmark::Triad,
(false, false) => Benchmark::All,
(true, true) => {
panic!("Both triad and nstream are enabled, pick one or omit both to run all benchmarks")
}
};
let array_bytes = option.arraysize * size_of::<T>();
let total_bytes = array_bytes * 3;
let (mega_scale, mega_suffix, giga_scale, giga_suffix) = if !option.mibibytes {
(1.0e-6, "MB", 1.0e-9, "GB")
} else {
(2f64.powi(-20), "MiB", 2f64.powi(-30), "GiB")
};
if !option.csv {
println!(
"Running {} {} times",
match benchmark {
Benchmark::All => "kernels",
Benchmark::Triad => "triad",
Benchmark::NStream => "nstream",
},
option.numtimes
);
if benchmark == Benchmark::Triad {
println!("Number of elements: {}", option.arraysize);
}
println!("Precision: {}", if option.float { "float" } else { "double" });
println!(
"Array size: {:.1} {} (={:.1} {})",
mega_scale * array_bytes as f64,
mega_suffix,
giga_scale * array_bytes as f64,
giga_suffix
);
println!(
"Total size: {:.1} {} (={:.1} {})",
mega_scale * total_bytes as f64,
mega_suffix,
giga_scale * total_bytes as f64,
giga_suffix
);
}
stream.init_arrays();
let tabulate = |xs: &Vec<Duration>, name: &str, t_size: usize| -> Vec<(&str, String)> {
let tail = &xs[1..]; // tail only
// do stats
let max = tail.iter().max().map(|d| d.as_secs_f64());
let min = tail.iter().min().map(|d| d.as_secs_f64());
match (min, max) {
(Some(min), Some(max)) => {
let avg: f64 = tail.iter().map(|d| d.as_secs_f64()).sum::<f64>() / tail.len() as f64;
let mbps = mega_scale * (t_size as f64) / min;
if option.csv {
vec![
("function", name.to_string()),
("num_times", option.numtimes.to_string()),
("n_elements", option.arraysize.to_string()),
("sizeof", t_size.to_string()),
(
if option.mibibytes { "max_mibytes_per_sec" } else { "max_mbytes_per_sec" },
mbps.to_string(),
),
("min_runtime", min.to_string()),
("max_runtime", max.to_string()),
("avg_runtime", avg.to_string()),
]
} else {
vec![
("Function", name.to_string()),
(if option.mibibytes { "MiBytes/sec" } else { "MBytes/sec" }, format!("{:.3}", mbps)),
("Min (sec)", format!("{:.5}", min)),
("Max", format!("{:.5}", max)),
("Average", format!("{:.5}", avg)),
]
}
}
(_, _) => panic!("No min/max element for {}(size={})", name, t_size),
}
};
let tabulate_all = |xs: Vec<Vec<(&str, String)>>| {
match xs.as_slice() {
[head, ..] => {
if option.csv {
println!("{}", head.iter().map(|(col, _)| *col).collect::<Vec<_>>().join(","));
for kvs in xs {
println!("{}", kvs.iter().map(|(_, val)| val.clone()).collect::<Vec<_>>().join(","));
}
} else {
let mut table = Table::new(&vec!["{:<}"; head.len()].join(" "));
table.add_row(head.iter().fold(Row::new(), |row, (col, _)| row.with_cell(col)));
for kvs in xs {
table.add_row(kvs.iter().fold(Row::new(), |row, (_, val)| row.with_cell(val)));
}
print!("{}", table);
}
}
_ => panic!("Empty tabulation"),
};
};
let solutions_correct = match benchmark {
Benchmark::All => {
let (results, sum) = stream.run_all(option.numtimes);
let correct = check_solution(benchmark, option.numtimes, &stream, Some(sum));
tabulate_all(vec![
tabulate(&results.copy, "Copy", 2 * array_bytes),
tabulate(&results.mul, "Mul", 2 * array_bytes),
tabulate(&results.add, "Add", 3 * array_bytes),
tabulate(&results.triad, "Triad", 3 * array_bytes),
tabulate(&results.dot, "Dot", 2 * array_bytes),
]);
correct
}
Benchmark::NStream => {
let results = stream.run_nstream(option.numtimes);
let correct = check_solution(benchmark, option.numtimes, &stream, None);
tabulate_all(vec![tabulate(&results, "Nstream", 4 * array_bytes)]);
correct
}
Benchmark::Triad => {
let results = stream.run_triad(option.numtimes);
let correct = check_solution(benchmark, option.numtimes, &stream, None);
let total_bytes = 3 * array_bytes * option.numtimes;
let bandwidth = giga_scale * (total_bytes as f64 / results.as_secs_f64());
println!("Runtime (seconds): {:.5}", results.as_secs_f64());
println!("Bandwidth ({}/s): {:.3} ", giga_suffix, bandwidth);
correct
}
};
&stream.clean_up();
solutions_correct
}
const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION");
static START_A: f32 = 0.1;
static START_B: f32 = 0.2;
static START_C: f32 = 0.0;
static START_SCALAR: f32 = 0.4;
static FLOAT_INIT_SCALAR: f32 = START_SCALAR;
static FLOAT_INIT: (f32, f32, f32) = (START_A, START_B, START_C);
static DOUBLE_START_SCALAR: f64 = START_SCALAR as f64;
static DOUBLE_INIT: (f64, f64, f64) = (START_A as f64, START_B as f64, START_C as f64);
pub fn run(args: &Vec<String>) -> bool {
println!("`{:?}`", args);
let options: Options = Options::from_iter(args);
if options.numtimes < 2 {
panic!("numtimes must be >= 2")
}
let alloc = System;
let alloc_name = if options.malloc { "libc-malloc" } else { "rust-system" };
let rayon_device = &|| {
let dev = RayonDevice { pool: rayon::ThreadPoolBuilder::default().build().unwrap() };
if !options.csv {
println!("Using {} thread(s), alloc={}", dev.pool.current_num_threads(), alloc_name);
if options.pin {
colour::e_yellow_ln!("Pinning threads have no effect on Rayon!")
}
}
if options.float {
run_cpu(
&options,
StreamData::new_in(
options.arraysize,
FLOAT_INIT_SCALAR,
FLOAT_INIT,
dev,
alloc,
options.malloc,
options.init,
),
)
} else {
run_cpu(
&options,
StreamData::new_in(
options.arraysize,
DOUBLE_START_SCALAR,
DOUBLE_INIT,
dev,
alloc,
options.malloc,
options.init,
),
)
}
};
let crossbeam_device = &|| {
let ncores = num_cpus::get();
let dev = ThreadedDevice::new(ncores, options.pin);
if !options.csv {
println!("Using {} thread(s), pin={}, alloc={}", ncores, options.pin, alloc_name)
}
if options.float {
run_cpu(
&options,
StreamData::new_in(
options.arraysize,
FLOAT_INIT_SCALAR,
FLOAT_INIT,
dev,
alloc,
options.malloc,
options.init,
),
)
} else {
run_cpu(
&options,
StreamData::new_in(
options.arraysize,
DOUBLE_START_SCALAR,
DOUBLE_INIT,
dev,
alloc,
options.malloc,
options.init,
),
)
}
};
let st_device = &|| {
let dev = SerialDevice { pin: options.pin };
if !options.csv {
println!("Using 1 thread, pin={}, alloc={}", options.pin, alloc_name);
}
if options.float {
run_cpu(
&options,
StreamData::new_in(
options.arraysize,
FLOAT_INIT_SCALAR,
FLOAT_INIT,
dev,
alloc,
options.malloc,
options.init,
),
)
} else {
run_cpu(
&options,
StreamData::new_in(
options.arraysize,
DOUBLE_START_SCALAR,
DOUBLE_INIT,
dev,
alloc,
options.malloc,
options.init,
),
)
}
};
let devices: Vec<(String, &'_ dyn Fn() -> bool)> = vec![
("CPU (Rayon)".to_string(), rayon_device),
(format!("CPU (Crossbeam, pinning={})", options.pin), crossbeam_device),
("CPU (Single threaded)".to_string(), st_device),
];
if options.list {
devices.iter().enumerate().for_each(|(i, (name, _))| {
println!("[{}] {}", i, name);
});
true
} else {
match devices.get(options.device) {
Some((name, run)) => {
if !&options.csv {
println!(
"BabelStream\n\
Version: {}\n\
Implementation: Rust; {}",
VERSION.unwrap_or("unknown"),
name
);
if options.init {
println!("Initialising arrays on main thread");
}
}
run()
}
None => {
eprintln!("Device index {} not available", options.device);
false
}
}
}
}

View File

@ -1,414 +1,5 @@
#![feature(allocator_api)]
#![feature(vec_into_raw_parts)]
use std::alloc::System;
use std::fmt::{Debug, Display};
use std::iter::Sum;
use std::mem::size_of;
use std::time::Duration;
use num_traits::abs;
use structopt::StructOpt;
use tabular::{Row, Table};
use crate::crossbeam_stream::ThreadedDevice;
use crate::plain_stream::SerialDevice;
use crate::rayon_stream::RayonDevice;
use crate::stream::{AllocatorType, ArrayType, RustStream, StreamData};
mod crossbeam_stream;
mod plain_stream;
mod rayon_stream;
mod stream;
#[derive(Debug, StructOpt)]
struct Options {
/// List available devices
#[structopt(long)]
list: bool,
/// Select device at <device>
#[structopt(long, default_value = "0")]
device: usize,
/// Run the test <numtimes> times (NUM >= 2)
#[structopt(long, short = "n", default_value = "100")]
numtimes: usize,
/// Use <arraysize> elements in the array
#[structopt(long, short = "s", default_value = "33554432")]
arraysize: usize,
/// Use floats (rather than doubles)
#[structopt(long)]
float: bool,
/// Only run triad
#[structopt(long)]
triad_only: bool,
/// Only run nstream
#[structopt(long)]
nstream_only: bool,
/// Output as csv table
#[structopt(long)]
csv: bool,
/// Use MiB=2^20 for bandwidth calculation (default MB=10^6)
#[structopt(long)]
mibibytes: bool,
/// Use libc malloc instead of the Rust's allocator for benchmark array allocation
#[structopt(name = "malloc", long)]
malloc: bool,
/// Initialise each benchmark array at allocation time on the main thread
#[structopt(name = "init", long)]
init: bool,
/// Pin threads to distinct cores, this has NO effect in Rayon devices
#[structopt(long)]
pin: bool,
}
#[derive(PartialEq)]
enum Benchmark {
All,
Triad,
NStream,
}
fn check_solution<T: ArrayType + Display + Sum + Into<f64>, D, A: AllocatorType>(
benchmark: Benchmark, numtimes: usize, vec: &StreamData<T, D, A>, dot_sum: Option<T>,
) {
let (mut gold_a, mut gold_b, mut gold_c) = vec.init;
for _ in 0..numtimes {
match benchmark {
Benchmark::All => {
gold_c = gold_a;
gold_b = vec.scalar * gold_c;
gold_c = gold_a + gold_b;
gold_a = gold_b + vec.scalar * gold_c;
}
Benchmark::Triad => {
gold_a = gold_b + vec.scalar * gold_c;
}
Benchmark::NStream => {
gold_a += gold_b + vec.scalar * gold_c;
}
};
}
let tolerance = T::epsilon().into() * 100.0f64;
let validate_xs = |name: &str, xs: &Vec<T, A>, from: T| {
let error = (xs.iter().map(|x| abs(*x - from)).sum::<T>()).into() / xs.len() as f64;
if error > tolerance {
eprintln!("Validation failed on {}[]. Average error {} ", name, error)
}
};
validate_xs("a", &vec.a, gold_a);
validate_xs("b", &vec.b, gold_b);
validate_xs("c", &vec.c, gold_c);
if let Some(sum) = dot_sum {
let gold_sum = (gold_a * gold_b).into() * vec.size as f64;
let error = abs((sum.into() - gold_sum) / gold_sum);
if error > 1.0e-8 {
eprintln!(
"Validation failed on sum. Error {} \nSum was {} but should be {}",
error, sum, gold_sum
);
}
}
}
fn run_cpu<T: ArrayType + Sync + Send + Sum + Into<f64> + Display, D, A: AllocatorType>(
option: &Options, mut stream: StreamData<T, D, A>,
) where
StreamData<T, D, A>: RustStream<T>,
{
let benchmark = match (option.nstream_only, option.triad_only) {
(true, false) => Benchmark::NStream,
(false, true) => Benchmark::Triad,
(false, false) => Benchmark::All,
(true, true) => {
panic!("Both triad and nstream are enabled, pick one or omit both to run all benchmarks")
}
};
let array_bytes = option.arraysize * size_of::<T>();
let total_bytes = array_bytes * 3;
let (mega_scale, mega_suffix, giga_scale, giga_suffix) = if !option.mibibytes {
(1.0e-6, "MB", 1.0e-9, "GB")
} else {
(2f64.powi(-20), "MiB", 2f64.powi(-30), "GiB")
};
if !option.csv {
println!(
"Running {} {} times",
match benchmark {
Benchmark::All => "kernels",
Benchmark::Triad => "triad",
Benchmark::NStream => "nstream",
},
option.numtimes
);
if benchmark == Benchmark::Triad {
println!("Number of elements: {}", option.arraysize);
}
println!("Precision: {}", if option.float { "float" } else { "double" });
println!(
"Array size: {:.1} {} (={:.1} {})",
mega_scale * array_bytes as f64,
mega_suffix,
giga_scale * array_bytes as f64,
giga_suffix
);
println!(
"Total size: {:.1} {} (={:.1} {})",
mega_scale * total_bytes as f64,
mega_suffix,
giga_scale * total_bytes as f64,
giga_suffix
);
}
stream.init_arrays();
let tabulate = |xs: &Vec<Duration>, name: &str, t_size: usize| -> Vec<(&str, String)> {
let tail = &xs[1..]; // tail only
// do stats
let max = tail.iter().max().map(|d| d.as_secs_f64());
let min = tail.iter().min().map(|d| d.as_secs_f64());
match (min, max) {
(Some(min), Some(max)) => {
let avg: f64 = tail.iter().map(|d| d.as_secs_f64()).sum::<f64>() / tail.len() as f64;
let mbps = mega_scale * (t_size as f64) / min;
if option.csv {
vec![
("function", name.to_string()),
("num_times", option.numtimes.to_string()),
("n_elements", option.arraysize.to_string()),
("sizeof", t_size.to_string()),
(
if option.mibibytes { "max_mibytes_per_sec" } else { "max_mbytes_per_sec" },
mbps.to_string(),
),
("min_runtime", min.to_string()),
("max_runtime", max.to_string()),
("avg_runtime", avg.to_string()),
]
} else {
vec![
("Function", name.to_string()),
(if option.mibibytes { "MiBytes/sec" } else { "MBytes/sec" }, format!("{:.3}", mbps)),
("Min (sec)", format!("{:.5}", min)),
("Max", format!("{:.5}", max)),
("Average", format!("{:.5}", avg)),
]
}
}
(_, _) => panic!("No min/max element for {}(size={})", name, t_size),
}
};
let tabulate_all = |xs: Vec<Vec<(&str, String)>>| {
match xs.as_slice() {
[head, ..] => {
if option.csv {
println!("{}", head.iter().map(|(col, _)| *col).collect::<Vec<_>>().join(","));
for kvs in xs {
println!("{}", kvs.iter().map(|(_, val)| val.clone()).collect::<Vec<_>>().join(","));
}
} else {
let mut table = Table::new(&vec!["{:<}"; head.len()].join(" "));
table.add_row(head.iter().fold(Row::new(), |row, (col, _)| row.with_cell(col)));
for kvs in xs {
table.add_row(kvs.iter().fold(Row::new(), |row, (_, val)| row.with_cell(val)));
}
print!("{}", table);
}
}
_ => panic!("Empty tabulation"),
};
};
match benchmark {
Benchmark::All => {
let (results, sum) = stream.run_all(option.numtimes);
check_solution(benchmark, option.numtimes, &stream, Some(sum));
tabulate_all(vec![
tabulate(&results.copy, "Copy", 2 * array_bytes),
tabulate(&results.mul, "Mul", 2 * array_bytes),
tabulate(&results.add, "Add", 3 * array_bytes),
tabulate(&results.triad, "Triad", 3 * array_bytes),
tabulate(&results.dot, "Dot", 2 * array_bytes),
])
}
Benchmark::NStream => {
let results = stream.run_nstream(option.numtimes);
check_solution(benchmark, option.numtimes, &stream, None);
tabulate_all(vec![tabulate(&results, "Nstream", 4 * array_bytes)]);
}
Benchmark::Triad => {
let results = stream.run_triad(option.numtimes);
let total_bytes = 3 * array_bytes * option.numtimes;
let bandwidth = giga_scale * (total_bytes as f64 / results.as_secs_f64());
println!("Runtime (seconds): {:.5}", results.as_secs_f64());
println!("Bandwidth ({}/s): {:.3} ", giga_suffix, bandwidth);
}
};
&stream.clean_up();
}
const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION");
static START_A: f32 = 0.1;
static START_B: f32 = 0.2;
static START_C: f32 = 0.0;
static START_SCALAR: f32 = 0.4;
static FLOAT_INIT_SCALAR: f32 = START_SCALAR;
static FLOAT_INIT: (f32, f32, f32) = (START_A, START_B, START_C);
static DOUBLE_START_SCALAR: f64 = START_SCALAR as f64;
static DOUBLE_INIT: (f64, f64, f64) = (START_A as f64, START_B as f64, START_C as f64);
fn main() {
let options: Options = Options::from_args();
if options.numtimes < 2 {
panic!("numtimes must be >= 2")
}
let alloc = System;
let alloc_name = if options.malloc { "libc-malloc" } else { "rust-system" };
let rayon_device = &|| {
let dev = RayonDevice { pool: rayon::ThreadPoolBuilder::default().build().unwrap() };
if !options.csv {
println!("Using {} thread(s), alloc={}", dev.pool.current_num_threads(), alloc_name);
if options.pin {
colour::e_yellow_ln!("Pinning threads have no effect on Rayon!")
}
}
if options.float {
run_cpu(
&options,
StreamData::new_in(
options.arraysize,
FLOAT_INIT_SCALAR,
FLOAT_INIT,
dev,
alloc,
options.malloc,
options.init,
),
);
} else {
run_cpu(
&options,
StreamData::new_in(
options.arraysize,
DOUBLE_START_SCALAR,
DOUBLE_INIT,
dev,
alloc,
options.malloc,
options.init,
),
);
}
};
let crossbeam_device = &|| {
let ncores = num_cpus::get();
let dev = ThreadedDevice::new(ncores, options.pin);
if !options.csv {
println!("Using {} thread(s), pin={}, alloc={}", ncores, options.pin, alloc_name)
}
if options.float {
run_cpu(
&options,
StreamData::new_in(
options.arraysize,
FLOAT_INIT_SCALAR,
FLOAT_INIT,
dev,
alloc,
options.malloc,
options.init,
),
);
} else {
run_cpu(
&options,
StreamData::new_in(
options.arraysize,
DOUBLE_START_SCALAR,
DOUBLE_INIT,
dev,
alloc,
options.malloc,
options.init,
),
);
}
};
let st_device = &|| {
let dev = SerialDevice { pin: options.pin };
if !options.csv {
println!("Using 1 thread, pin={}, alloc={}", options.pin, alloc_name);
}
if options.float {
run_cpu(
&options,
StreamData::new_in(
options.arraysize,
FLOAT_INIT_SCALAR,
FLOAT_INIT,
dev,
alloc,
options.malloc,
options.init,
),
);
} else {
run_cpu(
&options,
StreamData::new_in(
options.arraysize,
DOUBLE_START_SCALAR,
DOUBLE_INIT,
dev,
alloc,
options.malloc,
options.init,
),
);
}
};
let devices: Vec<(String, &'_ dyn Fn())> = vec![
("CPU (Rayon)".to_string(), rayon_device),
(format!("CPU (Crossbeam, pinning={})", options.pin), crossbeam_device),
("CPU (Single threaded)".to_string(), st_device),
];
if options.list {
devices.iter().enumerate().for_each(|(i, (name, _))| {
println!("[{}] {}", i, name);
})
} else {
match devices.get(options.device) {
Some((name, run)) => {
if !&options.csv {
println!(
"BabelStream\n\
Version: {}\n\
Implementation: Rust; {}",
VERSION.unwrap_or("unknown"),
name
);
if options.init {
println!("Initialising arrays on main thread");
}
}
run();
}
None => eprintln!("Device index {} not available", options.device),
}
if !rust_stream::run(&std::env::args().collect::<Vec<_>>()) {
std::process::exit(1);
}
}

View File

@ -47,7 +47,7 @@ impl<T: ArrayType, A: AllocatorType> RustStream<T> for StreamData<T, SerialDevic
fn nstream(&mut self) {
for i in 0..self.size {
self.a[i] += self.b[i] * self.scalar * self.c[i];
self.a[i] += self.b[i] + self.scalar * self.c[i];
}
}

View File

@ -0,0 +1,17 @@
use rstest::rstest;
#[rstest]
fn test_main(
#[values(0, 1, 2)] device: usize, //
#[values("", "--pin")] pin: &str, //
#[values("", "--malloc")] malloc: &str, //
#[values("", "--init")] init: &str, //
#[values("", "--triad-only", "--nstream-only")] option: &str, //
) {
let line = format!(
"rust-stream --arraysize 2048 --device {} {} {} {} {}",
device, pin, malloc, init, option
);
let args = line.split_whitespace().map(|s| s.to_string()).collect::<Vec<_>>();
assert!(rust_stream::run(&args));
}