diff --git a/README.md b/README.md index 25ba9ae..cd8ab86 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,14 @@ Pass in extra flags via the `EXTRA_FLAGS` option. The binaries are named in the form `-stream`. +This project also contains implementations in alternative languages with different build systems: +* Rust - Install the [Rust toolchain](https://www.rust-lang.org/tools/install) and proceed to compile a binary via: + ```shell + > cd rust-stream/ + > cargo build --release + > ./target/release/rust-stream + ``` + Building Kokkos --------------- diff --git a/rust-stream/.gitignore b/rust-stream/.gitignore new file mode 100644 index 0000000..3a8cabc --- /dev/null +++ b/rust-stream/.gitignore @@ -0,0 +1,2 @@ +/target +.idea diff --git a/rust-stream/Cargo.lock b/rust-stream/Cargo.lock new file mode 100644 index 0000000..66addaa --- /dev/null +++ b/rust-stream/Cargo.lock @@ -0,0 +1,357 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "ansi_term" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +dependencies = [ + "winapi", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] +name = "bitflags" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "2.33.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" +dependencies = [ + "ansi_term", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2584f639eb95fea8c798496315b297cf81b9b58b6d30ab066a75455333cf4b12" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "lazy_static", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7e9d99fa91428effe99c5c6d4634cdeba32b8cf784fc428a2a687f61a952c49" +dependencies = [ + "autocfg", + "cfg-if", + "lazy_static", +] + +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + +[[package]] +name = "heck" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "hermit-abi" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c" +dependencies = [ + "libc", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8916b1f6ca17130ec6568feccee27c156ad12037880833a3b842a823236502e7" + +[[package]] +name = "memoffset" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "157b4208e3059a8f9e78d559edc658e13df41410cb3ae03979c83130067fdd87" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b0d8e0819fadc20c74ea8373106ead0600e3a67ef1fe8da56e39b9ae7275674" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "lazy_static", + "num_cpus", +] + +[[package]] +name = "rust-stream" +version = "3.4.0" +dependencies = [ + "num-traits", + "rayon", + "structopt", + "tabular", +] + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + +[[package]] +name = "structopt" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5277acd7ee46e63e5168a80734c9f6ee81b1367a7d8772a2d765df2a3705d28c" +dependencies = [ + "clap", + "lazy_static", + "structopt-derive", +] + +[[package]] +name = "structopt-derive" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ba9cdfda491b814720b6b06e0cac513d922fc407582032e8706e9f137976f90" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "syn" +version = "1.0.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fd9d1e9976102a03c542daa2eff1b43f9d72306342f3f8b3ed5fb8908195d6f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "tabular" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7e35bee02dcefe64a74065b6b869d241eab1a02fea0d65e6074ce4e51894c3b" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "unicode-segmentation" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796" + +[[package]] +name = "unicode-width" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" + +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" + +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + +[[package]] +name = "version_check" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/rust-stream/Cargo.toml b/rust-stream/Cargo.toml new file mode 100644 index 0000000..35a2880 --- /dev/null +++ b/rust-stream/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "rust-stream" +version = "3.4.0" +authors = ["Wei-Chen Lin "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +num-traits = "0.2.14" +structopt = "0.3.13" +tabular = "0.1.4" +rayon = "1.5" diff --git a/rust-stream/src/main.rs b/rust-stream/src/main.rs new file mode 100644 index 0000000..3e31fd9 --- /dev/null +++ b/rust-stream/src/main.rs @@ -0,0 +1,413 @@ +use std::fmt::{Debug, Display}; +use std::iter::Sum; +use std::mem::size_of; +use std::time::{Duration, Instant}; + +use num_traits::{abs, NumAssign, Signed}; +use num_traits::real::Real; +use rayon::prelude::*; +use structopt::StructOpt; +use tabular::{Row, Table}; + +#[derive(Debug, StructOpt)] +struct Options { + /// List available devices + #[structopt(long)] list: bool, + /// Select device at + #[structopt(long, default_value = "0")] device: usize, + /// Run the test times (NUM >= 2) + #[structopt(long, default_value = "100")] numtimes: usize, + /// Use elements in the array + #[structopt(long, default_value = "33554432")] arraysize: usize, + /// Use floats (rather than doubles) + #[structopt(long)] float: bool, + /// Only run triad + #[structopt(long)] triad_only: bool, + /// Only run nstream + #[structopt(long)] nstream_only: bool, + /// Output as csv table + #[structopt(long)] csv: bool, + /// Use MiB=2^20 for bandwidth calculation (default MB=10^6) + #[structopt(long)] mibibytes: bool, +} + +#[derive(PartialEq)] +enum Benchmark { All, Triad, NStream } + +struct StreamData { + size: usize, + scalar: T, + a: Vec, + b: Vec, + c: Vec, +} + +impl StreamData { + pub fn new(size: usize, scalar: T) -> StreamData { + StreamData { + size, + scalar, + a: vec![T::default(); size], + b: vec![T::default(); size], + c: vec![T::default(); size], + } + } +} + +struct PlainFor; + +struct RayonPar; + +#[inline(always)] +fn timed(f: F) -> Duration { + let start = Instant::now(); + f(); + start.elapsed() +} + +#[inline(always)] +fn timed_mut T>(f: &mut F) -> (Duration, T) { + let start = Instant::now(); + let x = f(); + (start.elapsed(), x) +} + +struct AllTiming { copy: T, mul: T, add: T, triad: T, dot: T } + +trait RustStream { + fn init_arrays(&mut self, init: (T, T, T)); + fn copy(&mut self); + fn mul(&mut self); + fn add(&mut self); + fn triad(&mut self); + fn nstream(&mut self); + fn dot(&mut self) -> T; + + fn run_all(&mut self, n: usize) -> (AllTiming>, T) { + let mut timings: AllTiming> = AllTiming { + copy: vec![Duration::default(); n], + mul: vec![Duration::default(); n], + add: vec![Duration::default(); n], + triad: vec![Duration::default(); n], + dot: vec![Duration::default(); n], + }; + let mut last_sum = T::default(); + for i in 0..n { + timings.copy[i] = timed(|| self.copy()); + timings.mul[i] = timed(|| self.mul()); + timings.add[i] = timed(|| self.add()); + timings.triad[i] = timed(|| self.triad()); + let (dot, sum) = timed_mut(&mut || self.dot()); + timings.dot[i] = dot; + last_sum = sum; + } + (timings, last_sum) + } + + fn run_triad(&mut self, n: usize) -> Duration { + timed(|| for _ in 0..n { self.triad(); }) + } + + fn run_nstream(&mut self, n: usize) -> Vec { + (0..n).map(|_| timed(|| self.nstream())).collect::>() + } +} + +trait ArrayType: Real + NumAssign + Signed + Default {} + +impl ArrayType for T {} + +// single threaded version +impl RustStream for StreamData { + fn init_arrays(&mut self, init: (T, T, T)) { + self.a.fill(init.0); + self.b.fill(init.1); + self.c.fill(init.2); + } + + fn copy(&mut self) { + for i in 0..self.size { + self.c[i] = self.a[i]; + } + } + + fn mul(&mut self) { + for i in 0..self.size { + self.b[i] = self.scalar * self.c[i]; + } + } + + fn add(&mut self) { + for i in 0..self.size { + self.c[i] = self.a[i] + self.b[i]; + } + } + + fn triad(&mut self) { + for i in 0..self.size { + self.a[i] = self.b[i] + self.scalar * self.c[i]; + } + } + + fn nstream(&mut self) { + for i in 0..self.size { + self.a[i] += self.b[i] * self.scalar * self.c[i]; + } + } + + fn dot(&mut self) -> T { + let mut sum: T = T::default(); + for i in 0..self.size { + sum += self.a[i] * self.b[i]; + } + sum + } +} + +// Rayon version, it should be semantically equal to the single threaded version +impl RustStream for StreamData { + fn init_arrays(&mut self, init: (T, T, T)) { + self.a.fill(init.0); + self.b.fill(init.1); + self.c.fill(init.2); + } + + fn copy(&mut self) { + let a = &self.a; + self.c.par_iter_mut().enumerate().for_each(|(i, c)| *c = a[i]) + } + + fn mul(&mut self) { + let c = &self.c; + let scalar = &self.scalar; + self.b.par_iter_mut().enumerate().for_each(|(i, b)| *b = *scalar * c[i]) + } + + fn add(&mut self) { + let a = &self.a; + let b = &self.b; + self.c.par_iter_mut().enumerate().for_each(|(i, c)| *c = a[i] + b[i]) + } + + fn triad(&mut self) { + let scalar = &self.scalar; + let b = &self.b; + let c = &self.c; + self.a.par_iter_mut().enumerate().for_each(|(i, a)| *a = b[i] + *scalar * c[i]) + } + + fn nstream(&mut self) { + let scalar = &self.scalar; + let b = &self.b; + let c = &self.c; + self.a.par_iter_mut().enumerate().for_each(|(i, a)| *a += b[i] + *scalar * c[i]) + } + + fn dot(&mut self) -> T { + let a = &self.a; + let b = &self.b; + (0..self.size).into_par_iter().fold(|| T::default(), |acc, i| acc + a[i] * b[i]).sum::() + } +} + +fn validate>( + benchmark: Benchmark, + numtimes: usize, + vec: &StreamData, + dot_sum: Option, + scalar: T, init: (T, T, T)) { + let (mut gold_a, mut gold_b, mut gold_c) = init; + for _ in 0..numtimes { + match benchmark { + Benchmark::All => { + gold_c = gold_a; + gold_b = scalar * gold_c; + gold_c = gold_a + gold_b; + gold_a = gold_b + scalar * gold_c; + } + Benchmark::Triad => { + gold_a = gold_b + scalar * gold_c; + } + Benchmark::NStream => { + gold_a += gold_b + scalar * gold_c; + } + }; + } + let tolerance = T::epsilon().into() * 100.0f64; + let validate_xs = |name: &str, xs: &Vec, from: T| { + let error = (xs.iter().map(|x| abs(*x - from)).sum::()).into() / xs.len() as f64; + if error > tolerance { + eprintln!("Validation failed on {}[]. Average error {} ", name, error) + } + }; + validate_xs("a", &vec.a, gold_a); + validate_xs("b", &vec.b, gold_b); + validate_xs("c", &vec.c, gold_c); + + if let Some(sum) = dot_sum { + let gold_sum = (gold_a * gold_b).into() * vec.size as f64; + let error = abs((sum.into() - gold_sum) / gold_sum); + if error > 1.0e-8 { + eprintln!("Validation failed on sum. Error {} \nSum was {} but should be {}", error, sum, gold_sum); + } + } +} + +fn run_cpu + Display>(option: Options, scalar: T, init: (T, T, T)) { + let benchmark = match (option.nstream_only, option.triad_only) { + (true, false) => Benchmark::NStream, + (false, true) => Benchmark::Triad, + (false, false) => Benchmark::All, + (true, true) => panic!("Both triad and nstream are enabled, pick one or omit both to run all benchmarks"), + }; + + let array_bytes = option.arraysize * size_of::(); + let total_bytes = array_bytes * 3; + let (mega_scale, mega_suffix, giga_scale, giga_suffix) = + if !option.mibibytes { (1.0e-6, "MB", 1.0e-9, "GB") } else { (2f64.powi(-20), "MiB", 2f64.powi(-30), "GiB") }; + + if !option.csv { + println!("Running {} {} times", match benchmark { + Benchmark::All => "kernels", + Benchmark::Triad => "triad", + Benchmark::NStream => "nstream", + }, option.numtimes); + + if benchmark == Benchmark::Triad { + println!("Number of elements: {}", option.arraysize); + } + + println!("Precision: {}", if option.float { "float" } else { "double" }); + println!("Array size: {:.1} {}(={:.1} {})", + mega_scale * array_bytes as f64, mega_suffix, giga_scale * array_bytes as f64, giga_suffix); + println!("Total size: {:.1} {}(={:.1} {})", + mega_scale * total_bytes as f64, mega_suffix, giga_scale * total_bytes as f64, giga_suffix); + } + + + let mut vec: StreamData = StreamData::::new(option.arraysize, scalar); + let stream = &mut vec as &mut dyn RustStream; + stream.init_arrays(init); + + let tabulate = |xs: &Vec, name: &str, t_size: usize| -> Vec<(&str, String)> { + let tail = &xs[1..]; // tail only + // do stats + let max = tail.iter().max().map(|d| d.as_secs_f64()); + let min = tail.iter().min().map(|d| d.as_secs_f64()); + match (min, max) { + (Some(min), Some(max)) => { + let avg: f64 = tail.iter().map(|d| d.as_secs_f64()).sum::() / tail.len() as f64; + let mbps = mega_scale * (t_size as f64) / min; + if option.csv { + vec![ + ("function", name.to_string()), + ("num_times", option.numtimes.to_string()), + ("n_elements", option.arraysize.to_string()), + ("sizeof", t_size.to_string()), + (if option.mibibytes { "max_mibytes_per_sec" } else { "max_mbytes_per_sec" }, mbps.to_string()), + ("min_runtime", min.to_string()), + ("max_runtime", max.to_string()), + ("avg_runtime", avg.to_string()), + ] + } else { + vec![ + ("Function", name.to_string()), + (if option.mibibytes { "MiBytes/sec" } else { "MBytes/sec" }, format!("{:.3}", mbps)), + ("Min (sec)", format!("{:.5}", min)), + ("Max", format!("{:.5}", max)), + ("Average", format!("{:.5}", avg)), + ] + } + } + (_, _) => panic!("No min/max element for {}(size={})", name, t_size) + } + }; + + let tabulate_all = |xs: Vec>| { + match xs.as_slice() { + [head, .. ] => { + if option.csv { + println!("{}", head.iter().map(|(col, _)| *col).collect::>().join(",")); + for kvs in xs { + println!("{}", kvs.iter().map(|(_, val)| val.clone()).collect::>().join(",")); + } + } else { + let mut table = Table::new(&vec!["{:<}"; head.len()].join(" ")); + table.add_row(head.iter().fold(Row::new(), |row, (col, _)| row.with_cell(col))); + for kvs in xs { + table.add_row(kvs.iter().fold(Row::new(), |row, (_, val)| row.with_cell(val))); + } + println!("{}", table); + } + } + _ => panic!("Empty tabulation") + }; + }; + + match benchmark { + Benchmark::All => { + let (results, sum) = stream.run_all(option.numtimes); + validate(benchmark, option.numtimes, &vec, Some(sum), scalar, init); + tabulate_all(vec![ + tabulate(&results.copy, "Copy", 2 * array_bytes), + tabulate(&results.mul, "Mul", 2 * array_bytes), + tabulate(&results.add, "Add", 3 * array_bytes), + tabulate(&results.triad, "Triad", 3 * array_bytes), + tabulate(&results.dot, "Dot", 2 * array_bytes), + ]) + } + Benchmark::NStream => { + let results = stream.run_nstream(option.numtimes); + validate(benchmark, option.numtimes, &vec, None, scalar, init); + tabulate_all(vec![ + tabulate(&results, "Nstream", 4 * array_bytes) + ]); + } + Benchmark::Triad => { + let results = stream.run_triad(option.numtimes); + let total_bytes = 3 * array_bytes * option.numtimes; + let bandwidth = mega_scale * (total_bytes as f64 / results.as_secs_f64()); + + println!("Runtime (seconds): {:.5}", results.as_secs_f64()); + println!("Bandwidth ({}/s): {:.3} ", giga_suffix, bandwidth); + } + }; +} + +const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION"); + +static START_A: f32 = 0.1; +static START_B: f32 = 0.2; +static START_C: f32 = 0.0; +static START_SCALAR: f32 = 0.4; + +fn main() { + let options: Options = Options::from_args(); + + // only CPU via Rayon for now + let devices = vec![("CPU (Rayon)", |opt: Options| { + if opt.float { + run_cpu::(opt, START_SCALAR, (START_A, START_B, START_C)); + } else { + run_cpu::(opt, START_SCALAR.into(), (START_A.into(), START_B.into(), START_C.into())); + } + })]; + + if options.list { + devices.iter().enumerate().for_each(|(i, (name, _))| { + println!("{}: {}", i, name); + }) + } else { + match devices.get(options.device) { + Some((_, run)) => { + if !&options.csv { + println!("BabelStream\n\ + Version: {}\n\ + Implementation: Rust+Rayon", VERSION.unwrap_or("unknown")) + } + run(options); + } + None => eprintln!("Device index({}) not available", options.device) + } + } +}