commit
9ec3018b7f
16
.github/workflows/main.yaml
vendored
16
.github/workflows/main.yaml
vendored
@ -11,6 +11,22 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
|
||||||
|
test-rust:
|
||||||
|
runs-on: ubuntu-18.04
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
working-directory: ./src/rust/rust-stream
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: Setup project
|
||||||
|
run: rustup install nightly
|
||||||
|
- name: Compile project
|
||||||
|
run: cargo +nightly build --release
|
||||||
|
- name: Test project
|
||||||
|
run: cargo +nightly test --release
|
||||||
|
- name: Test run project
|
||||||
|
run: ./target/release/rust-stream --arraysize 2048
|
||||||
|
|
||||||
test-java:
|
test-java:
|
||||||
runs-on: ubuntu-18.04
|
runs-on: ubuntu-18.04
|
||||||
defaults:
|
defaults:
|
||||||
|
|||||||
@ -41,11 +41,11 @@ BabelStream is currently implemented in the following parallel programming model
|
|||||||
- TBB
|
- TBB
|
||||||
- Thrust (via CUDA or HIP)
|
- Thrust (via CUDA or HIP)
|
||||||
|
|
||||||
|
|
||||||
This project also contains implementations in alternative languages with different build systems:
|
This project also contains implementations in alternative languages with different build systems:
|
||||||
* Julia - [JuliaStream.jl](./src/julia/JuliaStream.jl)
|
* Julia - [JuliaStream.jl](./src/julia/JuliaStream.jl)
|
||||||
* Java - [java-stream](./src/java/java-stream)
|
* Java - [java-stream](./src/java/java-stream)
|
||||||
* Scala - [scala-stream](./src/scala/scala-stream)
|
* Scala - [scala-stream](./src/scala/scala-stream)
|
||||||
|
* Rust - [rust-stream](./src/rust/rust-stream)
|
||||||
|
|
||||||
## How is this different to STREAM?
|
## How is this different to STREAM?
|
||||||
|
|
||||||
|
|||||||
2
src/rust/rust-stream/.cargo/config.toml
Normal file
2
src/rust/rust-stream/.cargo/config.toml
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
[build]
|
||||||
|
rustflags = ["-C", "target-cpu=native"]
|
||||||
2
src/rust/rust-stream/.gitignore
vendored
Normal file
2
src/rust/rust-stream/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
/target
|
||||||
|
.idea
|
||||||
636
src/rust/rust-stream/Cargo.lock
generated
Normal file
636
src/rust/rust-stream/Cargo.lock
generated
Normal file
@ -0,0 +1,636 @@
|
|||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ansi_term"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
|
||||||
|
dependencies = [
|
||||||
|
"winapi 0.3.9",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "atty"
|
||||||
|
version = "0.2.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||||
|
dependencies = [
|
||||||
|
"hermit-abi",
|
||||||
|
"libc",
|
||||||
|
"winapi 0.3.9",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "autocfg"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bitflags"
|
||||||
|
version = "1.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cfg-if"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap"
|
||||||
|
version = "2.33.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
|
||||||
|
dependencies = [
|
||||||
|
"ansi_term",
|
||||||
|
"atty",
|
||||||
|
"bitflags",
|
||||||
|
"strsim",
|
||||||
|
"textwrap",
|
||||||
|
"unicode-width",
|
||||||
|
"vec_map",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "colour"
|
||||||
|
version = "0.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a27e4532f26f510c24bb8477d963c0c3ef27e293c3b2c507cccb0536d493201a"
|
||||||
|
dependencies = [
|
||||||
|
"crossterm",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "core_affinity"
|
||||||
|
version = "0.5.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7f8a03115cc34fb0d7c321dd154a3914b3ca082ccc5c11d91bf7117dbbe7171f"
|
||||||
|
dependencies = [
|
||||||
|
"kernel32-sys",
|
||||||
|
"libc",
|
||||||
|
"num_cpus",
|
||||||
|
"winapi 0.2.8",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam"
|
||||||
|
version = "0.8.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4ae5588f6b3c3cb05239e90bd110f257254aecd01e4635400391aeae07497845"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"crossbeam-channel",
|
||||||
|
"crossbeam-deque",
|
||||||
|
"crossbeam-epoch",
|
||||||
|
"crossbeam-queue",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-channel"
|
||||||
|
version = "0.5.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-deque"
|
||||||
|
version = "0.8.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"crossbeam-epoch",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-epoch"
|
||||||
|
version = "0.9.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"crossbeam-utils",
|
||||||
|
"lazy_static",
|
||||||
|
"memoffset",
|
||||||
|
"scopeguard",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-queue"
|
||||||
|
version = "0.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9b10ddc024425c88c2ad148c1b0fd53f4c6d38db9697c9f1588381212fa657c9"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-utils"
|
||||||
|
version = "0.8.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"lazy_static",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossterm"
|
||||||
|
version = "0.19.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7c36c10130df424b2f3552fcc2ddcd9b28a27b1e54b358b45874f88d1ca6888c"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"crossterm_winapi",
|
||||||
|
"lazy_static",
|
||||||
|
"libc",
|
||||||
|
"mio",
|
||||||
|
"parking_lot",
|
||||||
|
"signal-hook",
|
||||||
|
"winapi 0.3.9",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossterm_winapi"
|
||||||
|
version = "0.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0da8964ace4d3e4a044fd027919b2237000b24315a37c916f61809f1ff2140b9"
|
||||||
|
dependencies = [
|
||||||
|
"winapi 0.3.9",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.6.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "heck"
|
||||||
|
version = "0.3.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-segmentation",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hermit-abi"
|
||||||
|
version = "0.1.19"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "instant"
|
||||||
|
version = "0.1.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "kernel32-sys"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
|
||||||
|
dependencies = [
|
||||||
|
"winapi 0.2.8",
|
||||||
|
"winapi-build",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lazy_static"
|
||||||
|
version = "1.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libc"
|
||||||
|
version = "0.2.108"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8521a1b57e76b1ec69af7599e75e38e7b7fad6610f037db8c79b127201b5d119"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lock_api"
|
||||||
|
version = "0.4.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "712a4d093c9976e24e7dbca41db895dabcbac38eb5f4045393d17a95bdfb1109"
|
||||||
|
dependencies = [
|
||||||
|
"scopeguard",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "log"
|
||||||
|
version = "0.4.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memoffset"
|
||||||
|
version = "0.6.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "59accc507f1338036a0477ef61afdae33cde60840f4dfe481319ce3ad116ddf9"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mio"
|
||||||
|
version = "0.7.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8067b404fe97c70829f082dec8bcf4f71225d7eaea1d8645349cb76fa06205cc"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"log",
|
||||||
|
"miow",
|
||||||
|
"ntapi",
|
||||||
|
"winapi 0.3.9",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "miow"
|
||||||
|
version = "0.3.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b9f1c5b025cda876f66ef43a113f91ebc9f4ccef34843000e0adf6ebbab84e21"
|
||||||
|
dependencies = [
|
||||||
|
"winapi 0.3.9",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ntapi"
|
||||||
|
version = "0.3.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44"
|
||||||
|
dependencies = [
|
||||||
|
"winapi 0.3.9",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num-traits"
|
||||||
|
version = "0.2.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num_cpus"
|
||||||
|
version = "1.13.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3"
|
||||||
|
dependencies = [
|
||||||
|
"hermit-abi",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "parking_lot"
|
||||||
|
version = "0.11.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
|
||||||
|
dependencies = [
|
||||||
|
"instant",
|
||||||
|
"lock_api",
|
||||||
|
"parking_lot_core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "parking_lot_core"
|
||||||
|
version = "0.8.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"instant",
|
||||||
|
"libc",
|
||||||
|
"redox_syscall",
|
||||||
|
"smallvec",
|
||||||
|
"winapi 0.3.9",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pest"
|
||||||
|
version = "2.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53"
|
||||||
|
dependencies = [
|
||||||
|
"ucd-trie",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro-error"
|
||||||
|
version = "1.0.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro-error-attr",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
"version_check",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro-error-attr"
|
||||||
|
version = "1.0.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"version_check",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.32"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ba508cc11742c0dc5c1659771673afbab7a0efab23aa17e854cbab0837ed0b43"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-xid",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rayon"
|
||||||
|
version = "1.5.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
"crossbeam-deque",
|
||||||
|
"either",
|
||||||
|
"rayon-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rayon-core"
|
||||||
|
version = "1.9.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-channel",
|
||||||
|
"crossbeam-deque",
|
||||||
|
"crossbeam-utils",
|
||||||
|
"lazy_static",
|
||||||
|
"num_cpus",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "redox_syscall"
|
||||||
|
version = "0.2.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rstest"
|
||||||
|
version = "0.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "041bb0202c14f6a158bbbf086afb03d0c6e975c2dec7d4912f8061ed44f290af"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"rustc_version",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rust-stream"
|
||||||
|
version = "3.4.0"
|
||||||
|
dependencies = [
|
||||||
|
"colour",
|
||||||
|
"core_affinity",
|
||||||
|
"crossbeam",
|
||||||
|
"libc",
|
||||||
|
"num-traits",
|
||||||
|
"num_cpus",
|
||||||
|
"rayon",
|
||||||
|
"rstest",
|
||||||
|
"rustversion",
|
||||||
|
"structopt",
|
||||||
|
"tabular",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustc_version"
|
||||||
|
version = "0.3.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f0dfe2087c51c460008730de8b57e6a320782fbfb312e1f4d520e6c6fae155ee"
|
||||||
|
dependencies = [
|
||||||
|
"semver",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustversion"
|
||||||
|
version = "1.0.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "61b3909d758bb75c79f23d4736fac9433868679d3ad2ea7a61e3c25cfda9a088"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "scopeguard"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "semver"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6"
|
||||||
|
dependencies = [
|
||||||
|
"semver-parser",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "semver-parser"
|
||||||
|
version = "0.10.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7"
|
||||||
|
dependencies = [
|
||||||
|
"pest",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "signal-hook"
|
||||||
|
version = "0.1.17"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7e31d442c16f047a671b5a71e2161d6e68814012b7f5379d269ebd915fac2729"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"mio",
|
||||||
|
"signal-hook-registry",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "signal-hook-registry"
|
||||||
|
version = "1.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "smallvec"
|
||||||
|
version = "1.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strsim"
|
||||||
|
version = "0.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "structopt"
|
||||||
|
version = "0.3.25"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "40b9788f4202aa75c240ecc9c15c65185e6a39ccdeb0fd5d008b98825464c87c"
|
||||||
|
dependencies = [
|
||||||
|
"clap",
|
||||||
|
"lazy_static",
|
||||||
|
"structopt-derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "structopt-derive"
|
||||||
|
version = "0.4.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0"
|
||||||
|
dependencies = [
|
||||||
|
"heck",
|
||||||
|
"proc-macro-error",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "1.0.82"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8daf5dd0bb60cbd4137b1b587d2fc0ae729bc07cf01cd70b36a1ed5ade3b9d59"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-xid",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tabular"
|
||||||
|
version = "0.1.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e7e35bee02dcefe64a74065b6b869d241eab1a02fea0d65e6074ce4e51894c3b"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-width",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "textwrap"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-width",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ucd-trie"
|
||||||
|
version = "0.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-segmentation"
|
||||||
|
version = "1.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-width"
|
||||||
|
version = "0.1.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-xid"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "vec_map"
|
||||||
|
version = "0.8.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "version_check"
|
||||||
|
version = "0.9.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi"
|
||||||
|
version = "0.2.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi"
|
||||||
|
version = "0.3.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-i686-pc-windows-gnu",
|
||||||
|
"winapi-x86_64-pc-windows-gnu",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-build"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-i686-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-x86_64-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||||
34
src/rust/rust-stream/Cargo.toml
Normal file
34
src/rust/rust-stream/Cargo.toml
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
[package]
|
||||||
|
name = "rust-stream"
|
||||||
|
version = "3.4.0"
|
||||||
|
authors = ["Wei-Chen Lin <wl14928@bristol.ac.uk>"]
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
num-traits = "0.2.14"
|
||||||
|
structopt = "0.3.13"
|
||||||
|
tabular = "0.1.4"
|
||||||
|
rayon = "1.5.1"
|
||||||
|
crossbeam = "0.8.1"
|
||||||
|
num_cpus = "1.13.0"
|
||||||
|
rustversion = "1.0"
|
||||||
|
libc = "0.2.97"
|
||||||
|
core_affinity = "0.5.10"
|
||||||
|
colour = "0.6.0"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
rstest = "0.10.0"
|
||||||
|
|
||||||
|
[build-dependencies]
|
||||||
|
rustversion = "1.0"
|
||||||
|
|
||||||
|
[profile.dev]
|
||||||
|
opt-level = 2
|
||||||
|
overflow-checks = true
|
||||||
|
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
opt-level = 3
|
||||||
|
lto = "thin" # fully enabling this (i.e true) negatively affects performance as tested on both AMD and Intel
|
||||||
78
src/rust/rust-stream/README.md
Normal file
78
src/rust/rust-stream/README.md
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
rust-stream
|
||||||
|
===========
|
||||||
|
|
||||||
|
This is an implementation of BabelStream in Rust.
|
||||||
|
|
||||||
|
Currently, we support three CPU threading API as devices:
|
||||||
|
|
||||||
|
* Plain - basic single-threaded `for` version, see [plain_stream.rs](src/plain_stream.rs)
|
||||||
|
* [Rayon](https://github.com/rayon-rs/rayon) - Parallel with high level API,
|
||||||
|
see [rayon_stream.rs](src/rayon_stream.rs)
|
||||||
|
* [Crossbeam](https://github.com/crossbeam-rs/crossbeam) - Parallel with partitions per thread,
|
||||||
|
see [crossbeam_stream.rs](src/crossbeam_stream.rs)
|
||||||
|
* Arc - Parallel with `Vec` per thread (static partitions) wrapped in `Mutex` contained in `Arc`s,
|
||||||
|
see [crossbeam_stream.rs](src/arc_stream.rs)
|
||||||
|
* Unsafe - Parallel with unsafe pointer per thread (static partitions) to `Vec`,
|
||||||
|
see [crossbeam_stream.rs](src/unsafe_stream.rs)
|
||||||
|
|
||||||
|
In addition, this implementation also supports the following extra flags:
|
||||||
|
****
|
||||||
|
```
|
||||||
|
--init Initialise each benchmark array at allocation time on the main thread
|
||||||
|
--malloc Use libc malloc instead of the Rust's allocator for benchmark array allocation
|
||||||
|
--pin Pin threads to distinct cores, this has NO effect in Rayon devices
|
||||||
|
```
|
||||||
|
|
||||||
|
Max thread count is controlled by the environment variable `BABELSTREAM_NUM_THREADS` which is compatible for all devices (avoid setting `RAYON_NUM_THREADS`, the implementation will issue a warning if this happened).
|
||||||
|
|
||||||
|
There is an ongoing investigation on potential performance issues under NUMA situations. As part of
|
||||||
|
the experiment, this implementation made use of the
|
||||||
|
provisional [Allocator traits](https://github.com/rust-lang/rust/issues/32838) which requires rust
|
||||||
|
unstable. We hope a NUMA aware allocator will be available once the allocator API reaches rust
|
||||||
|
stable.
|
||||||
|
|
||||||
|
### Build & Run
|
||||||
|
|
||||||
|
Prerequisites:
|
||||||
|
|
||||||
|
* [Rust toolchain](https://www.rust-lang.org/tools/install)
|
||||||
|
|
||||||
|
Once the toolchain is installed, enable the nightly channel:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
> rustup install nightly
|
||||||
|
> rustup default nightly # optional, this sets `+nightly` automatically for cargo calls later
|
||||||
|
```
|
||||||
|
|
||||||
|
With `cargo` on path, compile and run the benchmark with:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
> cd rust-stream/
|
||||||
|
> cargo +nightly build --release # or simply `cargo build --release` if nightly channel is the default
|
||||||
|
> ./target/release/rust-stream --help
|
||||||
|
rust-stream 3.4.0
|
||||||
|
|
||||||
|
USAGE:
|
||||||
|
rust-stream [FLAGS] [OPTIONS]
|
||||||
|
|
||||||
|
FLAGS:
|
||||||
|
--csv Output as csv table
|
||||||
|
--float Use floats (rather than doubles)
|
||||||
|
-h, --help Prints help information
|
||||||
|
--init Initialise each benchmark array at allocation time on the main thread
|
||||||
|
--list List available devices
|
||||||
|
--malloc Use libc malloc instead of the Rust's allocator for benchmark array allocation
|
||||||
|
--mibibytes Use MiB=2^20 for bandwidth calculation (default MB=10^6)
|
||||||
|
--nstream-only Only run nstream
|
||||||
|
--pin Pin threads to distinct cores, this has NO effect in Rayon devices
|
||||||
|
--triad-only Only run triad
|
||||||
|
-V, --version Prints version information
|
||||||
|
|
||||||
|
OPTIONS:
|
||||||
|
-s, --arraysize <arraysize> Use <arraysize> elements in the array [default: 33554432]
|
||||||
|
--device <device> Select device at <device> [default: 0]
|
||||||
|
-n, --numtimes <numtimes> Run the test <numtimes> times (NUM >= 2) [default: 100]
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
68
src/rust/rust-stream/rustfmt.toml
Normal file
68
src/rust/rust-stream/rustfmt.toml
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
max_width = 100
|
||||||
|
hard_tabs = false
|
||||||
|
tab_spaces = 2
|
||||||
|
newline_style = "Auto"
|
||||||
|
use_small_heuristics = "Max"
|
||||||
|
indent_style = "Block"
|
||||||
|
wrap_comments = false
|
||||||
|
format_code_in_doc_comments = false
|
||||||
|
comment_width = 80
|
||||||
|
normalize_comments = false
|
||||||
|
normalize_doc_attributes = false
|
||||||
|
license_template_path = ""
|
||||||
|
format_strings = false
|
||||||
|
format_macro_matchers = false
|
||||||
|
format_macro_bodies = true
|
||||||
|
empty_item_single_line = true
|
||||||
|
struct_lit_single_line = true
|
||||||
|
fn_single_line = true
|
||||||
|
where_single_line = true
|
||||||
|
imports_indent = "Block"
|
||||||
|
imports_layout = "Mixed"
|
||||||
|
imports_granularity = "Preserve"
|
||||||
|
group_imports = "Preserve"
|
||||||
|
reorder_imports = true
|
||||||
|
reorder_modules = true
|
||||||
|
reorder_impl_items = false
|
||||||
|
type_punctuation_density = "Wide"
|
||||||
|
space_before_colon = false
|
||||||
|
space_after_colon = true
|
||||||
|
spaces_around_ranges = false
|
||||||
|
binop_separator = "Front"
|
||||||
|
remove_nested_parens = true
|
||||||
|
combine_control_expr = true
|
||||||
|
overflow_delimited_expr = false
|
||||||
|
struct_field_align_threshold = 0
|
||||||
|
enum_discrim_align_threshold = 0
|
||||||
|
match_arm_blocks = true
|
||||||
|
match_arm_leading_pipes = "Never"
|
||||||
|
force_multiline_blocks = false
|
||||||
|
fn_args_layout = "Compressed"
|
||||||
|
brace_style = "PreferSameLine"
|
||||||
|
control_brace_style = "AlwaysSameLine"
|
||||||
|
trailing_semicolon = true
|
||||||
|
trailing_comma = "Vertical"
|
||||||
|
match_block_trailing_comma = false
|
||||||
|
blank_lines_upper_bound = 1
|
||||||
|
blank_lines_lower_bound = 0
|
||||||
|
edition = "2015"
|
||||||
|
version = "One"
|
||||||
|
inline_attribute_width = 0
|
||||||
|
merge_derives = true
|
||||||
|
use_try_shorthand = false
|
||||||
|
use_field_init_shorthand = false
|
||||||
|
force_explicit_abi = true
|
||||||
|
condense_wildcard_suffixes = false
|
||||||
|
color = "Auto"
|
||||||
|
required_version = "1.4.38"
|
||||||
|
unstable_features = false
|
||||||
|
disable_all_formatting = false
|
||||||
|
skip_children = false
|
||||||
|
hide_parse_errors = false
|
||||||
|
error_on_line_overflow = false
|
||||||
|
error_on_unformatted = false
|
||||||
|
report_todo = "Never"
|
||||||
|
report_fixme = "Never"
|
||||||
|
ignore = []
|
||||||
|
emit_mode = "Files"
|
||||||
|
make_backup = false
|
||||||
254
src/rust/rust-stream/src/arc_stream.rs
Normal file
254
src/rust/rust-stream/src/arc_stream.rs
Normal file
@ -0,0 +1,254 @@
|
|||||||
|
use std::iter::Sum;
|
||||||
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
|
use self::core_affinity::CoreId;
|
||||||
|
use crate::stream::{AllocatorType, ArrayType, RustStream, StreamData};
|
||||||
|
|
||||||
|
struct ArcHeapData<T: ArrayType, A: AllocatorType> {
|
||||||
|
a_chunks: Vec<Arc<Mutex<Vec<T, A>>>>,
|
||||||
|
b_chunks: Vec<Arc<Mutex<Vec<T, A>>>>,
|
||||||
|
c_chunks: Vec<Arc<Mutex<Vec<T, A>>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ArcDevice<T: ArrayType, A: AllocatorType> {
|
||||||
|
pub(crate) ncore: usize,
|
||||||
|
pub(crate) pin: bool,
|
||||||
|
pub(crate) core_ids: Vec<CoreId>,
|
||||||
|
data: ArcHeapData<T, A>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: ArrayType, A: AllocatorType> ArcDevice<T, A> {
|
||||||
|
pub fn new(ncore: usize, pin: bool, alloc: A) -> Self {
|
||||||
|
let mut core_ids = match core_affinity::get_core_ids() {
|
||||||
|
Some(xs) => xs,
|
||||||
|
None => {
|
||||||
|
colour::e_red_ln!("Cannot enumerate cores, pinning will not work if enabled");
|
||||||
|
(0..ncore).map(|i| CoreId { id: i }).collect()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
core_ids.resize(ncore, core_ids[0]);
|
||||||
|
|
||||||
|
let lift =
|
||||||
|
|| (0..ncore).map(|_| return Arc::new(Mutex::new(Vec::new_in(alloc)))).collect::<Vec<_>>();
|
||||||
|
let data = ArcHeapData { a_chunks: lift(), b_chunks: lift(), c_chunks: lift() };
|
||||||
|
|
||||||
|
ArcDevice { ncore, pin, core_ids, data }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn ref_a(&self, t: usize) -> Arc<Mutex<Vec<T, A>>> { self.data.a_chunks[t].clone() }
|
||||||
|
|
||||||
|
pub fn ref_b(&self, t: usize) -> Arc<Mutex<Vec<T, A>>> { self.data.b_chunks[t].clone() }
|
||||||
|
|
||||||
|
pub fn ref_c(&self, t: usize) -> Arc<Mutex<Vec<T, A>>> { self.data.c_chunks[t].clone() }
|
||||||
|
|
||||||
|
// divide the length by the number of cores, the last core gets less work if it does not divide
|
||||||
|
fn chunk_size(&self, len: usize, t: usize) -> usize {
|
||||||
|
assert!(t < self.ncore);
|
||||||
|
let chunk = (len as f64 / self.ncore as f64).ceil() as usize;
|
||||||
|
if t == self.ncore - 1 {
|
||||||
|
len - (t * chunk)
|
||||||
|
} else {
|
||||||
|
chunk
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extern crate core_affinity;
|
||||||
|
|
||||||
|
// Arc+Mutex threaded version, it should be semantically equal to the single threaded version
|
||||||
|
impl<T: 'static + ArrayType + Sync + Send + Sum, A: AllocatorType + Sync + Send + 'static>
|
||||||
|
RustStream<T> for StreamData<T, ArcDevice<T, A>, A>
|
||||||
|
{
|
||||||
|
fn init_arrays(&mut self) {
|
||||||
|
let init = self.init;
|
||||||
|
let pin = self.device.pin;
|
||||||
|
(0..self.device.ncore)
|
||||||
|
.map(&|t| {
|
||||||
|
let ref_a = self.device.ref_a(t);
|
||||||
|
let ref_b = self.device.ref_b(t);
|
||||||
|
let ref_c = self.device.ref_c(t);
|
||||||
|
let core = self.device.core_ids[t];
|
||||||
|
let n = self.device.chunk_size(self.size, t);
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(core);
|
||||||
|
}
|
||||||
|
ref_a.lock().unwrap().resize(n, init.0);
|
||||||
|
ref_b.lock().unwrap().resize(n, init.1);
|
||||||
|
ref_c.lock().unwrap().resize(n, init.2);
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into_iter()
|
||||||
|
.for_each(|t| t.join().unwrap());
|
||||||
|
}
|
||||||
|
fn read_arrays(&mut self) {
|
||||||
|
let range = self.size;
|
||||||
|
let unlift = |drain: &mut Vec<T, A>, source: &Vec<Arc<Mutex<Vec<T, A>>>>| {
|
||||||
|
let xs =
|
||||||
|
source.into_iter().flat_map(|x| x.lock().unwrap().clone().into_iter()).collect::<Vec<_>>();
|
||||||
|
for i in 0..range {
|
||||||
|
drain[i] = xs[i];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
unlift(&mut self.a, &self.device.data.a_chunks);
|
||||||
|
unlift(&mut self.b, &self.device.data.b_chunks);
|
||||||
|
unlift(&mut self.c, &self.device.data.c_chunks);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn copy(&mut self) {
|
||||||
|
let pin = self.device.pin;
|
||||||
|
(0..self.device.ncore)
|
||||||
|
.map(move |t| {
|
||||||
|
let ref_a = self.device.ref_a(t);
|
||||||
|
let ref_c = self.device.ref_c(t);
|
||||||
|
let core = self.device.core_ids[t];
|
||||||
|
let n = self.device.chunk_size(self.size, t);
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(core);
|
||||||
|
}
|
||||||
|
let a = ref_a.lock().unwrap();
|
||||||
|
let mut c = ref_c.lock().unwrap();
|
||||||
|
for i in 0..n {
|
||||||
|
c[i] = a[i];
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into_iter()
|
||||||
|
.for_each(|t| t.join().unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mul(&mut self) {
|
||||||
|
let scalar = self.scalar;
|
||||||
|
let pin = self.device.pin;
|
||||||
|
(0..self.device.ncore)
|
||||||
|
.map(move |t| {
|
||||||
|
let ref_b = self.device.ref_b(t);
|
||||||
|
let ref_c = self.device.ref_c(t);
|
||||||
|
let core = self.device.core_ids[t];
|
||||||
|
let n = self.device.chunk_size(self.size, t);
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(core);
|
||||||
|
}
|
||||||
|
let mut b = ref_b.lock().unwrap();
|
||||||
|
let c = ref_c.lock().unwrap();
|
||||||
|
for i in 0..n {
|
||||||
|
b[i] = scalar * c[i];
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into_iter()
|
||||||
|
.for_each(|t| t.join().unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add(&mut self) {
|
||||||
|
let pin = self.device.pin;
|
||||||
|
(0..self.device.ncore)
|
||||||
|
.map(&|t| {
|
||||||
|
let ref_a = self.device.ref_a(t);
|
||||||
|
let ref_b = self.device.ref_b(t);
|
||||||
|
let ref_c = self.device.ref_c(t);
|
||||||
|
let core = self.device.core_ids[t];
|
||||||
|
let n = self.device.chunk_size(self.size, t);
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(core);
|
||||||
|
}
|
||||||
|
let a = ref_a.lock().unwrap();
|
||||||
|
let b = ref_b.lock().unwrap();
|
||||||
|
let mut c = ref_c.lock().unwrap();
|
||||||
|
for i in 0..n {
|
||||||
|
c[i] = a[i] + b[i];
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into_iter()
|
||||||
|
.for_each(|t| t.join().unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn triad(&mut self) {
|
||||||
|
let scalar = self.scalar;
|
||||||
|
let pin = self.device.pin;
|
||||||
|
(0..self.device.ncore)
|
||||||
|
.map(&|t| {
|
||||||
|
let ref_a = self.device.ref_a(t);
|
||||||
|
let ref_b = self.device.ref_b(t);
|
||||||
|
let ref_c = self.device.ref_c(t);
|
||||||
|
let core = self.device.core_ids[t];
|
||||||
|
let n = self.device.chunk_size(self.size, t);
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(core);
|
||||||
|
}
|
||||||
|
let mut a = ref_a.lock().unwrap();
|
||||||
|
let b = ref_b.lock().unwrap();
|
||||||
|
let c = ref_c.lock().unwrap();
|
||||||
|
for i in 0..n {
|
||||||
|
a[i] = b[i] + scalar * c[i]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into_iter()
|
||||||
|
.for_each(|t| t.join().unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn nstream(&mut self) {
|
||||||
|
let scalar = self.scalar;
|
||||||
|
let pin = self.device.pin;
|
||||||
|
(0..self.device.ncore)
|
||||||
|
.map(&|t| {
|
||||||
|
let ref_a = self.device.ref_a(t);
|
||||||
|
let ref_b = self.device.ref_b(t);
|
||||||
|
let ref_c = self.device.ref_c(t);
|
||||||
|
let core = self.device.core_ids[t];
|
||||||
|
let n = self.device.chunk_size(self.size, t);
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(core);
|
||||||
|
}
|
||||||
|
let mut a = ref_a.lock().unwrap();
|
||||||
|
let b = ref_b.lock().unwrap();
|
||||||
|
let c = ref_c.lock().unwrap();
|
||||||
|
for i in 0..n {
|
||||||
|
a[i] += b[i] + scalar * c[i]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into_iter()
|
||||||
|
.for_each(|t| t.join().unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dot(&mut self) -> T {
|
||||||
|
let pin = self.device.pin;
|
||||||
|
(0..self.device.ncore)
|
||||||
|
.map(&|t| {
|
||||||
|
let ref_a = self.device.ref_a(t);
|
||||||
|
let ref_b = self.device.ref_b(t);
|
||||||
|
let core = self.device.core_ids[t];
|
||||||
|
let n = self.device.chunk_size(self.size, t);
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(core);
|
||||||
|
}
|
||||||
|
let a = ref_a.lock().unwrap();
|
||||||
|
let b = ref_b.lock().unwrap();
|
||||||
|
let mut p = T::default();
|
||||||
|
for i in 0..n {
|
||||||
|
p += a[i] * b[i];
|
||||||
|
}
|
||||||
|
p
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into_iter()
|
||||||
|
.map(|t| t.join().unwrap())
|
||||||
|
.sum()
|
||||||
|
}
|
||||||
|
}
|
||||||
221
src/rust/rust-stream/src/crossbeam_stream.rs
Normal file
221
src/rust/rust-stream/src/crossbeam_stream.rs
Normal file
@ -0,0 +1,221 @@
|
|||||||
|
use std::iter::Sum;
|
||||||
|
use std::slice::{Chunks, ChunksMut};
|
||||||
|
|
||||||
|
use crossbeam::thread;
|
||||||
|
|
||||||
|
use self::core_affinity::CoreId;
|
||||||
|
use crate::stream::{AllocatorType, ArrayType, RustStream, StreamData};
|
||||||
|
|
||||||
|
pub struct CrossbeamDevice {
|
||||||
|
pub(crate) ncore: usize,
|
||||||
|
pub(crate) pin: bool,
|
||||||
|
pub(crate) core_ids: Vec<CoreId>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CrossbeamDevice {
|
||||||
|
pub fn new(ncore: usize, pin: bool) -> Self {
|
||||||
|
let mut core_ids = match core_affinity::get_core_ids() {
|
||||||
|
Some(xs) => xs,
|
||||||
|
None => {
|
||||||
|
colour::e_red_ln!("Cannot enumerate cores, pinning will not work if enabled");
|
||||||
|
(0..ncore).map(|i| CoreId { id: i }).collect()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
core_ids.resize(ncore, core_ids[0]);
|
||||||
|
CrossbeamDevice { ncore, pin, core_ids }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CrossbeamDevice {
|
||||||
|
// divide the length by the number of cores, the last core gets less work if it does not divide
|
||||||
|
fn chunk_size(&self, len: usize) -> usize { (len as f64 / self.ncore as f64).ceil() as usize }
|
||||||
|
|
||||||
|
// make a mutable chunk from the vec
|
||||||
|
fn mk_mut_chunks<'a, T, A: AllocatorType>(&self, xs: &'a mut Vec<T, A>) -> ChunksMut<'a, T> {
|
||||||
|
let len = xs.len();
|
||||||
|
xs.chunks_mut(self.chunk_size(len))
|
||||||
|
}
|
||||||
|
|
||||||
|
// make a immutable chunk from the vec
|
||||||
|
fn mk_chunks<'a, T, A: AllocatorType>(&self, xs: &'a mut Vec<T, A>) -> Chunks<'a, T> {
|
||||||
|
xs.chunks(self.chunk_size(xs.len()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extern crate core_affinity;
|
||||||
|
|
||||||
|
// Crossbeam threaded version, it should be semantically equal to the single threaded version
|
||||||
|
impl<T: ArrayType + Sync + Send + Sum, A: AllocatorType + Sync + Send> RustStream<T>
|
||||||
|
for StreamData<T, CrossbeamDevice, A>
|
||||||
|
{
|
||||||
|
fn init_arrays(&mut self) {
|
||||||
|
thread::scope(|s| {
|
||||||
|
let init = self.init;
|
||||||
|
let pin = self.device.pin;
|
||||||
|
for (t, ((a, b), c)) in self.device.core_ids.iter().zip(
|
||||||
|
self
|
||||||
|
.device
|
||||||
|
.mk_mut_chunks(&mut self.a)
|
||||||
|
.zip(self.device.mk_mut_chunks(&mut self.b))
|
||||||
|
.zip(self.device.mk_mut_chunks(&mut self.c)),
|
||||||
|
) {
|
||||||
|
s.spawn(move |_| {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(*t);
|
||||||
|
}
|
||||||
|
for x in a.into_iter() {
|
||||||
|
*x = init.0;
|
||||||
|
}
|
||||||
|
for x in b.into_iter() {
|
||||||
|
*x = init.1;
|
||||||
|
}
|
||||||
|
for x in c.into_iter() {
|
||||||
|
*x = init.2;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn copy(&mut self) {
|
||||||
|
thread::scope(|s| {
|
||||||
|
let pin = self.device.pin;
|
||||||
|
for (t, (c, a)) in self
|
||||||
|
.device
|
||||||
|
.core_ids
|
||||||
|
.iter()
|
||||||
|
.zip(self.device.mk_mut_chunks(&mut self.c).zip(self.device.mk_chunks(&mut self.a)))
|
||||||
|
{
|
||||||
|
s.spawn(move |_| {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(*t);
|
||||||
|
}
|
||||||
|
for i in 0..c.len() {
|
||||||
|
c[i] = a[i];
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mul(&mut self) {
|
||||||
|
thread::scope(|s| {
|
||||||
|
let pin = self.device.pin;
|
||||||
|
let scalar = self.scalar;
|
||||||
|
for (t, (b, c)) in self
|
||||||
|
.device
|
||||||
|
.core_ids
|
||||||
|
.iter()
|
||||||
|
.zip(self.device.mk_mut_chunks(&mut self.b).zip(self.device.mk_chunks(&mut self.c)))
|
||||||
|
{
|
||||||
|
s.spawn(move |_| {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(*t);
|
||||||
|
}
|
||||||
|
for i in 0..b.len() {
|
||||||
|
b[i] = scalar * c[i];
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add(&mut self) {
|
||||||
|
thread::scope(|s| {
|
||||||
|
let pin = self.device.pin;
|
||||||
|
for (t, (c, (a, b))) in (&mut self.device.core_ids.iter()).zip(
|
||||||
|
self
|
||||||
|
.device
|
||||||
|
.mk_mut_chunks(&mut self.c)
|
||||||
|
.zip(self.device.mk_chunks(&mut self.a).zip(self.device.mk_chunks(&mut self.b))),
|
||||||
|
) {
|
||||||
|
s.spawn(move |_| {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(*t);
|
||||||
|
}
|
||||||
|
for i in 0..c.len() {
|
||||||
|
c[i] = a[i] + b[i];
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn triad(&mut self) {
|
||||||
|
thread::scope(|s| {
|
||||||
|
let pin = self.device.pin;
|
||||||
|
let scalar = self.scalar;
|
||||||
|
for (t, (a, (b, c))) in self.device.core_ids.iter().zip(
|
||||||
|
self
|
||||||
|
.device
|
||||||
|
.mk_mut_chunks(&mut self.a)
|
||||||
|
.zip(self.device.mk_chunks(&mut self.b).zip(self.device.mk_chunks(&mut self.c))),
|
||||||
|
) {
|
||||||
|
s.spawn(move |_| {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(*t);
|
||||||
|
}
|
||||||
|
for i in 0..a.len() {
|
||||||
|
a[i] = b[i] + scalar * c[i]
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn nstream(&mut self) {
|
||||||
|
thread::scope(|s| {
|
||||||
|
let pin = self.device.pin;
|
||||||
|
let scalar = self.scalar;
|
||||||
|
for (t, (a, (b, c))) in self.device.core_ids.iter().zip(
|
||||||
|
self
|
||||||
|
.device
|
||||||
|
.mk_mut_chunks(&mut self.a)
|
||||||
|
.zip(self.device.mk_chunks(&mut self.b).zip(self.device.mk_chunks(&mut self.c))),
|
||||||
|
) {
|
||||||
|
s.spawn(move |_| {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(*t);
|
||||||
|
}
|
||||||
|
for i in 0..a.len() {
|
||||||
|
a[i] += b[i] + scalar * c[i]
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dot(&mut self) -> T {
|
||||||
|
let mut partial_sum = vec![T::zero(); self.device.ncore];
|
||||||
|
thread::scope(|s| {
|
||||||
|
let pin = self.device.pin;
|
||||||
|
let a = &self.a;
|
||||||
|
let b = &self.b;
|
||||||
|
let chunk_indices = |i: usize| {
|
||||||
|
let chunk_size = self.device.chunk_size(self.size);
|
||||||
|
let start = i * chunk_size;
|
||||||
|
start..((start + chunk_size).min(self.size))
|
||||||
|
};
|
||||||
|
for (t, (n, acc)) in self.device.core_ids.iter().zip(partial_sum.iter_mut().enumerate()) {
|
||||||
|
s.spawn(move |_| {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(*t);
|
||||||
|
}
|
||||||
|
let mut p = T::zero();
|
||||||
|
for i in chunk_indices(n) {
|
||||||
|
p += a[i] * b[i];
|
||||||
|
}
|
||||||
|
*acc = p;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
partial_sum.into_iter().sum()
|
||||||
|
}
|
||||||
|
}
|
||||||
435
src/rust/rust-stream/src/lib.rs
Normal file
435
src/rust/rust-stream/src/lib.rs
Normal file
@ -0,0 +1,435 @@
|
|||||||
|
#![feature(allocator_api)]
|
||||||
|
#![feature(vec_into_raw_parts)]
|
||||||
|
|
||||||
|
use std::alloc::System;
|
||||||
|
use std::env;
|
||||||
|
use std::fmt::{Debug, Display};
|
||||||
|
use std::iter::Sum;
|
||||||
|
use std::mem::size_of;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use num_traits::abs;
|
||||||
|
use structopt::StructOpt;
|
||||||
|
use tabular::{Row, Table};
|
||||||
|
|
||||||
|
use crate::arc_stream::ArcDevice;
|
||||||
|
use crate::crossbeam_stream::CrossbeamDevice;
|
||||||
|
use crate::plain_stream::SerialDevice;
|
||||||
|
use crate::rayon_stream::RayonDevice;
|
||||||
|
use crate::stream::{AllocatorType, ArrayType, RustStream, StreamData};
|
||||||
|
use crate::unsafe_stream::UnsafeDevice;
|
||||||
|
|
||||||
|
mod arc_stream;
|
||||||
|
mod crossbeam_stream;
|
||||||
|
mod plain_stream;
|
||||||
|
mod rayon_stream;
|
||||||
|
mod stream;
|
||||||
|
mod unsafe_stream;
|
||||||
|
|
||||||
|
#[derive(Debug, StructOpt)]
|
||||||
|
struct Options {
|
||||||
|
/// List available devices
|
||||||
|
#[structopt(long)]
|
||||||
|
list: bool,
|
||||||
|
/// Select device at <device>
|
||||||
|
#[structopt(long, default_value = "0")]
|
||||||
|
device: usize,
|
||||||
|
/// Run the test <numtimes> times (NUM >= 2)
|
||||||
|
#[structopt(long, short = "n", default_value = "100")]
|
||||||
|
numtimes: usize,
|
||||||
|
/// Use <arraysize> elements in the array
|
||||||
|
#[structopt(long, short = "s", default_value = "33554432")]
|
||||||
|
arraysize: usize,
|
||||||
|
/// Use floats (rather than doubles)
|
||||||
|
#[structopt(long)]
|
||||||
|
float: bool,
|
||||||
|
/// Only run triad
|
||||||
|
#[structopt(long)]
|
||||||
|
triad_only: bool,
|
||||||
|
/// Only run nstream
|
||||||
|
#[structopt(long)]
|
||||||
|
nstream_only: bool,
|
||||||
|
/// Output as csv table
|
||||||
|
#[structopt(long)]
|
||||||
|
csv: bool,
|
||||||
|
/// Use MiB=2^20 for bandwidth calculation (default MB=10^6)
|
||||||
|
#[structopt(long)]
|
||||||
|
mibibytes: bool,
|
||||||
|
/// Use libc malloc instead of the Rust's allocator for benchmark array allocation
|
||||||
|
#[structopt(name = "malloc", long)]
|
||||||
|
malloc: bool,
|
||||||
|
/// Initialise each benchmark array at allocation time on the main thread
|
||||||
|
#[structopt(name = "init", long)]
|
||||||
|
init: bool,
|
||||||
|
/// Pin threads to distinct cores, this has NO effect in Rayon devices
|
||||||
|
#[structopt(long)]
|
||||||
|
pin: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq)]
|
||||||
|
enum Benchmark {
|
||||||
|
All,
|
||||||
|
Triad,
|
||||||
|
NStream,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_solution<T: ArrayType + Display + Sum + Into<f64>, D, A: AllocatorType>(
|
||||||
|
benchmark: Benchmark, numtimes: usize, vec: &StreamData<T, D, A>, dot_sum: Option<T>,
|
||||||
|
) -> bool {
|
||||||
|
let (mut gold_a, mut gold_b, mut gold_c) = vec.init;
|
||||||
|
for _ in 0..numtimes {
|
||||||
|
match benchmark {
|
||||||
|
Benchmark::All => {
|
||||||
|
gold_c = gold_a;
|
||||||
|
gold_b = vec.scalar * gold_c;
|
||||||
|
gold_c = gold_a + gold_b;
|
||||||
|
gold_a = gold_b + vec.scalar * gold_c;
|
||||||
|
}
|
||||||
|
Benchmark::Triad => {
|
||||||
|
gold_a = gold_b + vec.scalar * gold_c;
|
||||||
|
}
|
||||||
|
Benchmark::NStream => {
|
||||||
|
gold_a += gold_b + vec.scalar * gold_c;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
let tolerance = T::epsilon().into() * 100.0f64;
|
||||||
|
let validate_xs = |name: &str, xs: &Vec<T, A>, from: T| {
|
||||||
|
let error = (xs.iter().map(|x| abs(*x - from)).sum::<T>()).into() / xs.len() as f64;
|
||||||
|
let fail = error > tolerance;
|
||||||
|
if fail {
|
||||||
|
eprintln!("Validation failed on {}[]. Average error {} ", name, error);
|
||||||
|
}
|
||||||
|
!fail
|
||||||
|
};
|
||||||
|
let a_ok = validate_xs("a", &vec.a, gold_a);
|
||||||
|
let b_ok = validate_xs("b", &vec.b, gold_b);
|
||||||
|
let c_ok = validate_xs("c", &vec.c, gold_c);
|
||||||
|
let dot_ok = dot_sum.map_or(true, |sum| {
|
||||||
|
let gold_sum = (gold_a * gold_b).into() * vec.size as f64;
|
||||||
|
let error = abs((sum.into() - gold_sum) / gold_sum);
|
||||||
|
let fail = error > 1.0e-8;
|
||||||
|
if fail {
|
||||||
|
eprintln!(
|
||||||
|
"Validation failed on sum. Error {} \nSum was {} but should be {}",
|
||||||
|
error, sum, gold_sum
|
||||||
|
);
|
||||||
|
}
|
||||||
|
!fail
|
||||||
|
});
|
||||||
|
|
||||||
|
a_ok && b_ok && c_ok && dot_ok
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run_cpu<T: ArrayType + Sync + Send + Sum + Into<f64> + Display, D, A: AllocatorType>(
|
||||||
|
option: &Options, mut stream: StreamData<T, D, A>,
|
||||||
|
) -> bool
|
||||||
|
where StreamData<T, D, A>: RustStream<T> {
|
||||||
|
let benchmark = match (option.nstream_only, option.triad_only) {
|
||||||
|
(true, false) => Benchmark::NStream,
|
||||||
|
(false, true) => Benchmark::Triad,
|
||||||
|
(false, false) => Benchmark::All,
|
||||||
|
(true, true) => {
|
||||||
|
panic!("Both triad and nstream are enabled, pick one or omit both to run all benchmarks")
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let array_bytes = option.arraysize * size_of::<T>();
|
||||||
|
let total_bytes = array_bytes * 3;
|
||||||
|
let (mega_scale, mega_suffix, giga_scale, giga_suffix) = if !option.mibibytes {
|
||||||
|
(1.0e-6, "MB", 1.0e-9, "GB")
|
||||||
|
} else {
|
||||||
|
(2f64.powi(-20), "MiB", 2f64.powi(-30), "GiB")
|
||||||
|
};
|
||||||
|
|
||||||
|
if !option.csv {
|
||||||
|
println!(
|
||||||
|
"Running {} {} times",
|
||||||
|
match benchmark {
|
||||||
|
Benchmark::All => "kernels",
|
||||||
|
Benchmark::Triad => "triad",
|
||||||
|
Benchmark::NStream => "nstream",
|
||||||
|
},
|
||||||
|
option.numtimes
|
||||||
|
);
|
||||||
|
|
||||||
|
if benchmark == Benchmark::Triad {
|
||||||
|
println!("Number of elements: {}", option.arraysize);
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("Precision: {}", if option.float { "float" } else { "double" });
|
||||||
|
println!(
|
||||||
|
"Array size: {:.1} {} (={:.1} {})",
|
||||||
|
mega_scale * array_bytes as f64,
|
||||||
|
mega_suffix,
|
||||||
|
giga_scale * array_bytes as f64,
|
||||||
|
giga_suffix
|
||||||
|
);
|
||||||
|
println!(
|
||||||
|
"Total size: {:.1} {} (={:.1} {})",
|
||||||
|
mega_scale * total_bytes as f64,
|
||||||
|
mega_suffix,
|
||||||
|
giga_scale * total_bytes as f64,
|
||||||
|
giga_suffix
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
stream.init_arrays();
|
||||||
|
|
||||||
|
let tabulate = |xs: &Vec<Duration>, name: &str, t_size: usize| -> Vec<(&str, String)> {
|
||||||
|
let tail = &xs[1..]; // tail only
|
||||||
|
|
||||||
|
// do stats
|
||||||
|
let max = tail.iter().max().map(|d| d.as_secs_f64());
|
||||||
|
let min = tail.iter().min().map(|d| d.as_secs_f64());
|
||||||
|
match (min, max) {
|
||||||
|
(Some(min), Some(max)) => {
|
||||||
|
let avg: f64 = tail.iter().map(|d| d.as_secs_f64()).sum::<f64>() / tail.len() as f64;
|
||||||
|
let mbps = mega_scale * (t_size as f64) / min;
|
||||||
|
if option.csv {
|
||||||
|
vec![
|
||||||
|
("function", name.to_string()),
|
||||||
|
("num_times", option.numtimes.to_string()),
|
||||||
|
("n_elements", option.arraysize.to_string()),
|
||||||
|
("sizeof", t_size.to_string()),
|
||||||
|
(
|
||||||
|
if option.mibibytes { "max_mibytes_per_sec" } else { "max_mbytes_per_sec" },
|
||||||
|
mbps.to_string(),
|
||||||
|
),
|
||||||
|
("min_runtime", min.to_string()),
|
||||||
|
("max_runtime", max.to_string()),
|
||||||
|
("avg_runtime", avg.to_string()),
|
||||||
|
]
|
||||||
|
} else {
|
||||||
|
vec![
|
||||||
|
("Function", name.to_string()),
|
||||||
|
(if option.mibibytes { "MiBytes/sec" } else { "MBytes/sec" }, format!("{:.3}", mbps)),
|
||||||
|
("Min (sec)", format!("{:.5}", min)),
|
||||||
|
("Max", format!("{:.5}", max)),
|
||||||
|
("Average", format!("{:.5}", avg)),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(_, _) => panic!("No min/max element for {}(size={})", name, t_size),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let tabulate_all = |xs: Vec<Vec<(&str, String)>>| {
|
||||||
|
match xs.as_slice() {
|
||||||
|
[head, ..] => {
|
||||||
|
if option.csv {
|
||||||
|
println!("{}", head.iter().map(|(col, _)| *col).collect::<Vec<_>>().join(","));
|
||||||
|
for kvs in xs {
|
||||||
|
println!("{}", kvs.iter().map(|(_, val)| val.clone()).collect::<Vec<_>>().join(","));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let mut table = Table::new(&vec!["{:<}"; head.len()].join(" "));
|
||||||
|
table.add_row(head.iter().fold(Row::new(), |row, (col, _)| row.with_cell(col)));
|
||||||
|
for kvs in xs {
|
||||||
|
table.add_row(kvs.iter().fold(Row::new(), |row, (_, val)| row.with_cell(val)));
|
||||||
|
}
|
||||||
|
print!("{}", table);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => panic!("Empty tabulation"),
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
let solutions_correct = match benchmark {
|
||||||
|
Benchmark::All => {
|
||||||
|
let (results, sum) = stream.run_all(option.numtimes);
|
||||||
|
stream.read_arrays();
|
||||||
|
let correct = check_solution(benchmark, option.numtimes, &stream, Some(sum));
|
||||||
|
tabulate_all(vec![
|
||||||
|
tabulate(&results.copy, "Copy", 2 * array_bytes),
|
||||||
|
tabulate(&results.mul, "Mul", 2 * array_bytes),
|
||||||
|
tabulate(&results.add, "Add", 3 * array_bytes),
|
||||||
|
tabulate(&results.triad, "Triad", 3 * array_bytes),
|
||||||
|
tabulate(&results.dot, "Dot", 2 * array_bytes),
|
||||||
|
]);
|
||||||
|
correct
|
||||||
|
}
|
||||||
|
Benchmark::NStream => {
|
||||||
|
let results = stream.run_nstream(option.numtimes);
|
||||||
|
stream.read_arrays();
|
||||||
|
let correct = check_solution(benchmark, option.numtimes, &stream, None);
|
||||||
|
tabulate_all(vec![tabulate(&results, "Nstream", 4 * array_bytes)]);
|
||||||
|
correct
|
||||||
|
}
|
||||||
|
Benchmark::Triad => {
|
||||||
|
let results = stream.run_triad(option.numtimes);
|
||||||
|
stream.read_arrays();
|
||||||
|
let correct = check_solution(benchmark, option.numtimes, &stream, None);
|
||||||
|
let total_bytes = 3 * array_bytes * option.numtimes;
|
||||||
|
let bandwidth = giga_scale * (total_bytes as f64 / results.as_secs_f64());
|
||||||
|
println!("Runtime (seconds): {:.5}", results.as_secs_f64());
|
||||||
|
println!("Bandwidth ({}/s): {:.3} ", giga_suffix, bandwidth);
|
||||||
|
correct
|
||||||
|
}
|
||||||
|
};
|
||||||
|
stream.clean_up();
|
||||||
|
solutions_correct
|
||||||
|
}
|
||||||
|
|
||||||
|
const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION");
|
||||||
|
|
||||||
|
static START_A: f32 = 0.1;
|
||||||
|
static START_B: f32 = 0.2;
|
||||||
|
static START_C: f32 = 0.0;
|
||||||
|
static START_SCALAR: f32 = 0.4;
|
||||||
|
|
||||||
|
static FLOAT_INIT_SCALAR: f32 = START_SCALAR;
|
||||||
|
static FLOAT_INIT: (f32, f32, f32) = (START_A, START_B, START_C);
|
||||||
|
|
||||||
|
static DOUBLE_INIT_SCALAR: f64 = START_SCALAR as f64;
|
||||||
|
static DOUBLE_INIT: (f64, f64, f64) = (START_A as f64, START_B as f64, START_C as f64);
|
||||||
|
|
||||||
|
pub fn run(args: &Vec<String>) -> bool {
|
||||||
|
let opt: Options = Options::from_iter(args);
|
||||||
|
|
||||||
|
if opt.numtimes < 2 {
|
||||||
|
panic!("numtimes must be >= 2")
|
||||||
|
}
|
||||||
|
|
||||||
|
let alloc = System;
|
||||||
|
let alloc_name = if opt.malloc { "libc-malloc" } else { "rust-system" };
|
||||||
|
|
||||||
|
fn mk_data<T: ArrayType, D, A: AllocatorType>(
|
||||||
|
opt: &Options, init: (T, T, T), scalar: T, dev: D, alloc: A,
|
||||||
|
) -> StreamData<T, D, A> {
|
||||||
|
StreamData::new_in(opt.arraysize, scalar, init, dev, alloc, opt.malloc, opt.init)
|
||||||
|
}
|
||||||
|
|
||||||
|
let num_thread_key = "BABELSTREAM_NUM_THREADS";
|
||||||
|
let max_ncores = num_cpus::get();
|
||||||
|
let ncores = match env::var(num_thread_key) {
|
||||||
|
Ok(v) => match v.parse::<i64>() {
|
||||||
|
Err(bad) => {
|
||||||
|
colour::e_yellow_ln!(
|
||||||
|
"Cannot parse {} (reason: {}), defaulting to {}",
|
||||||
|
bad,
|
||||||
|
num_thread_key,
|
||||||
|
max_ncores
|
||||||
|
);
|
||||||
|
max_ncores
|
||||||
|
}
|
||||||
|
Ok(n) if n <= 0 || n > max_ncores as i64 => {
|
||||||
|
println!("{} out of bound ({}), defaulting to {}", num_thread_key, n, max_ncores);
|
||||||
|
max_ncores
|
||||||
|
}
|
||||||
|
Ok(n) => n as usize,
|
||||||
|
},
|
||||||
|
Err(_) => {
|
||||||
|
println!("{} not set, defaulting to max ({})", num_thread_key, max_ncores);
|
||||||
|
max_ncores
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let rayon_device = &|| {
|
||||||
|
let rayon_num_thread_key = "RAYON_NUM_THREADS";
|
||||||
|
if env::var(rayon_num_thread_key).is_ok() {
|
||||||
|
colour::e_yellow_ln!("{} is ignored, set {} instead", rayon_num_thread_key, num_thread_key)
|
||||||
|
}
|
||||||
|
let dev = RayonDevice {
|
||||||
|
pool: rayon::ThreadPoolBuilder::default().num_threads(ncores).build().unwrap(),
|
||||||
|
};
|
||||||
|
if !opt.csv {
|
||||||
|
println!("Using {} thread(s), alloc={}", dev.pool.current_num_threads(), alloc_name);
|
||||||
|
if opt.pin {
|
||||||
|
colour::e_yellow_ln!("Pinning threads have no effect on Rayon!")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if opt.float {
|
||||||
|
run_cpu(&opt, mk_data(&opt, FLOAT_INIT, FLOAT_INIT_SCALAR, dev, alloc))
|
||||||
|
} else {
|
||||||
|
run_cpu(&opt, mk_data(&opt, DOUBLE_INIT, DOUBLE_INIT_SCALAR, dev, alloc))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let arc_device = &|| {
|
||||||
|
if !opt.csv {
|
||||||
|
println!("Using {} thread, pin={}, alloc={}", ncores, opt.pin, alloc_name);
|
||||||
|
}
|
||||||
|
if opt.float {
|
||||||
|
let dev = ArcDevice::<f32, _>::new(ncores, opt.pin, alloc);
|
||||||
|
run_cpu(&opt, mk_data(&opt, FLOAT_INIT, FLOAT_INIT_SCALAR, dev, alloc))
|
||||||
|
} else {
|
||||||
|
let dev = ArcDevice::<f64, _>::new(ncores, opt.pin, alloc);
|
||||||
|
run_cpu(&opt, mk_data(&opt, DOUBLE_INIT, DOUBLE_INIT_SCALAR, dev, alloc))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let unsafe_device = &|| {
|
||||||
|
if !opt.csv {
|
||||||
|
println!("Using {} thread, pin={}, alloc={}", ncores, opt.pin, alloc_name);
|
||||||
|
}
|
||||||
|
if opt.float {
|
||||||
|
let dev = UnsafeDevice::<f32>::new(ncores, opt.pin);
|
||||||
|
run_cpu(&opt, mk_data(&opt, FLOAT_INIT, FLOAT_INIT_SCALAR, dev, alloc))
|
||||||
|
} else {
|
||||||
|
let dev = UnsafeDevice::<f64>::new(ncores, opt.pin);
|
||||||
|
run_cpu(&opt, mk_data(&opt, DOUBLE_INIT, DOUBLE_INIT_SCALAR, dev, alloc))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let crossbeam_device = &|| {
|
||||||
|
let dev = CrossbeamDevice::new(ncores, opt.pin);
|
||||||
|
if !opt.csv {
|
||||||
|
println!("Using {} thread(s), pin={}, alloc={}", ncores, opt.pin, alloc_name)
|
||||||
|
}
|
||||||
|
if opt.float {
|
||||||
|
run_cpu(&opt, mk_data(&opt, FLOAT_INIT, FLOAT_INIT_SCALAR, dev, alloc))
|
||||||
|
} else {
|
||||||
|
run_cpu(&opt, mk_data(&opt, DOUBLE_INIT, DOUBLE_INIT_SCALAR, dev, alloc))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let st_device = &|| {
|
||||||
|
let dev = SerialDevice { pin: opt.pin };
|
||||||
|
if !opt.csv {
|
||||||
|
println!("Using 1 thread, pin={}, alloc={}", opt.pin, alloc_name);
|
||||||
|
}
|
||||||
|
if opt.float {
|
||||||
|
run_cpu(&opt, mk_data(&opt, FLOAT_INIT, FLOAT_INIT_SCALAR, dev, alloc))
|
||||||
|
} else {
|
||||||
|
run_cpu(&opt, mk_data(&opt, DOUBLE_INIT, DOUBLE_INIT_SCALAR, dev, alloc))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let devices: Vec<(String, &'_ dyn Fn() -> bool)> = vec![
|
||||||
|
("CPU (Single threaded)".to_string(), st_device),
|
||||||
|
("CPU (Rayon)".to_string(), rayon_device),
|
||||||
|
(format!("CPU (Arc, pinning={})", opt.pin), arc_device),
|
||||||
|
(format!("CPU (Unsafe, pinning={})", opt.pin), unsafe_device),
|
||||||
|
(format!("CPU (Crossbeam, pinning={})", opt.pin), crossbeam_device),
|
||||||
|
];
|
||||||
|
|
||||||
|
if opt.list {
|
||||||
|
devices.iter().enumerate().for_each(|(i, (name, _))| {
|
||||||
|
println!("[{}] {}", i, name);
|
||||||
|
});
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
match devices.get(opt.device) {
|
||||||
|
Some((name, run)) => {
|
||||||
|
if !&opt.csv {
|
||||||
|
println!(
|
||||||
|
"BabelStream\n\
|
||||||
|
Version: {}\n\
|
||||||
|
Implementation: Rust; {}",
|
||||||
|
VERSION.unwrap_or("unknown"),
|
||||||
|
name
|
||||||
|
);
|
||||||
|
if opt.init {
|
||||||
|
println!("Initialising arrays on main thread");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
run()
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
eprintln!("Device index {} not available", opt.device);
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
5
src/rust/rust-stream/src/main.rs
Normal file
5
src/rust/rust-stream/src/main.rs
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
fn main() {
|
||||||
|
if !rust_stream::run(&std::env::args().collect::<Vec<_>>()) {
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
61
src/rust/rust-stream/src/plain_stream.rs
Normal file
61
src/rust/rust-stream/src/plain_stream.rs
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
use crate::stream::{AllocatorType, ArrayType, RustStream, StreamData};
|
||||||
|
use core_affinity::CoreId;
|
||||||
|
|
||||||
|
pub struct SerialDevice {
|
||||||
|
pub(crate) pin: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
// single threaded version
|
||||||
|
impl<T: ArrayType, A: AllocatorType> RustStream<T> for StreamData<T, SerialDevice, A> {
|
||||||
|
fn init_arrays(&mut self) {
|
||||||
|
if self.device.pin {
|
||||||
|
core_affinity::set_for_current(
|
||||||
|
match core_affinity::get_core_ids().as_ref().map(|x| x.first()) {
|
||||||
|
Some(Some(x)) => *x,
|
||||||
|
_ => CoreId { id: 0 },
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
self.a.fill(self.init.0);
|
||||||
|
self.b.fill(self.init.1);
|
||||||
|
self.c.fill(self.init.2);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn copy(&mut self) {
|
||||||
|
for i in 0..self.size {
|
||||||
|
self.c[i] = self.a[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mul(&mut self) {
|
||||||
|
for i in 0..self.size {
|
||||||
|
self.b[i] = self.scalar * self.c[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add(&mut self) {
|
||||||
|
for i in 0..self.size {
|
||||||
|
self.c[i] = self.a[i] + self.b[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn triad(&mut self) {
|
||||||
|
for i in 0..self.size {
|
||||||
|
self.a[i] = self.b[i] + self.scalar * self.c[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn nstream(&mut self) {
|
||||||
|
for i in 0..self.size {
|
||||||
|
self.a[i] += self.b[i] + self.scalar * self.c[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dot(&mut self) -> T {
|
||||||
|
let mut sum = T::default();
|
||||||
|
for i in 0..self.size {
|
||||||
|
sum += self.a[i] * self.b[i];
|
||||||
|
}
|
||||||
|
sum
|
||||||
|
}
|
||||||
|
}
|
||||||
77
src/rust/rust-stream/src/rayon_stream.rs
Normal file
77
src/rust/rust-stream/src/rayon_stream.rs
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
use std::iter::Sum;
|
||||||
|
|
||||||
|
use rayon::prelude::*;
|
||||||
|
use rayon::ThreadPool;
|
||||||
|
|
||||||
|
use crate::stream::{AllocatorType, ArrayType, RustStream, StreamData};
|
||||||
|
|
||||||
|
pub struct RayonDevice {
|
||||||
|
pub(crate) pool: ThreadPool,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rayon version, it should be semantically equal to the single threaded version
|
||||||
|
impl<T: ArrayType + Sync + Send + Sum, A: AllocatorType + Sync + Send> RustStream<T>
|
||||||
|
for StreamData<T, RayonDevice, A>
|
||||||
|
{
|
||||||
|
fn init_arrays(&mut self) {
|
||||||
|
let init = self.init;
|
||||||
|
self.a.par_iter_mut().for_each(|v| *v = init.0);
|
||||||
|
self.b.par_iter_mut().for_each(|v| *v = init.1);
|
||||||
|
self.c.par_iter_mut().for_each(|v| *v = init.2);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn copy(&mut self) {
|
||||||
|
let a = &self.a;
|
||||||
|
let c = &mut self.c;
|
||||||
|
self.device.pool.install(|| {
|
||||||
|
(*c).par_iter_mut().enumerate().for_each(|(i, c)| *c = a[i]);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mul(&mut self) {
|
||||||
|
let scalar = self.scalar;
|
||||||
|
let c = &self.c;
|
||||||
|
let b = &mut self.b;
|
||||||
|
self
|
||||||
|
.device
|
||||||
|
.pool
|
||||||
|
.install(|| (*b).par_iter_mut().enumerate().for_each(|(i, b)| *b = scalar * c[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add(&mut self) {
|
||||||
|
let a = &self.a;
|
||||||
|
let b = &self.b;
|
||||||
|
let c = &mut self.c;
|
||||||
|
self.device.pool.install(|| (*c).par_iter_mut().enumerate().for_each(|(i, c)| *c = a[i] + b[i]))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn triad(&mut self) {
|
||||||
|
let scalar = self.scalar;
|
||||||
|
let a = &mut self.a;
|
||||||
|
let b = &self.b;
|
||||||
|
let c = &self.c;
|
||||||
|
self
|
||||||
|
.device
|
||||||
|
.pool
|
||||||
|
.install(|| (*a).par_iter_mut().enumerate().for_each(|(i, a)| *a = b[i] + scalar * c[i]))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn nstream(&mut self) {
|
||||||
|
let scalar = self.scalar;
|
||||||
|
let a = &mut self.a;
|
||||||
|
let b = &self.b;
|
||||||
|
let c = &self.c;
|
||||||
|
self
|
||||||
|
.device
|
||||||
|
.pool
|
||||||
|
.install(|| (*a).par_iter_mut().enumerate().for_each(|(i, a)| *a += b[i] + scalar * c[i]))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dot(&mut self) -> T {
|
||||||
|
let a = &self.a;
|
||||||
|
let b = &self.b;
|
||||||
|
self.device.pool.install(|| {
|
||||||
|
(0..self.size).into_par_iter().fold(|| T::default(), |acc, i| acc + a[i] * b[i]).sum::<T>()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
167
src/rust/rust-stream/src/stream.rs
Normal file
167
src/rust/rust-stream/src/stream.rs
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
use num_traits::real::Real;
|
||||||
|
use num_traits::{NumAssign, Signed};
|
||||||
|
use std::alloc::Allocator;
|
||||||
|
use std::fmt::Debug;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
|
pub trait AllocatorType: Allocator + Copy + Clone + Default + Debug {}
|
||||||
|
impl<T: Allocator + Copy + Clone + Default + Debug> AllocatorType for T {}
|
||||||
|
|
||||||
|
pub struct StreamData<T, D, A: AllocatorType> {
|
||||||
|
pub device: D,
|
||||||
|
pub size: usize,
|
||||||
|
pub scalar: T,
|
||||||
|
pub init: (T, T, T),
|
||||||
|
pub a: Vec<T, A>,
|
||||||
|
pub b: Vec<T, A>,
|
||||||
|
pub c: Vec<T, A>,
|
||||||
|
pub needs_dealloc: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn timed<F: FnOnce()>(f: F) -> Duration {
|
||||||
|
let start = Instant::now();
|
||||||
|
f();
|
||||||
|
start.elapsed()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn timed_mut<T, F: FnMut() -> T>(f: &mut F) -> (Duration, T) {
|
||||||
|
let start = Instant::now();
|
||||||
|
let x = f();
|
||||||
|
(start.elapsed(), x)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct AllTiming<T> {
|
||||||
|
pub copy: T,
|
||||||
|
pub mul: T,
|
||||||
|
pub add: T,
|
||||||
|
pub triad: T,
|
||||||
|
pub dot: T,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait ArrayType: Real + NumAssign + Signed + Default + Debug {}
|
||||||
|
impl<T: Real + NumAssign + Signed + Default + Debug> ArrayType for T {}
|
||||||
|
|
||||||
|
impl<T: Default + Clone, D, A: AllocatorType> StreamData<T, D, A> {
|
||||||
|
pub fn new_in(
|
||||||
|
size: usize,
|
||||||
|
scalar: T,
|
||||||
|
init: (T, T, T),
|
||||||
|
device: D,
|
||||||
|
allocator: A,
|
||||||
|
malloc: bool, //
|
||||||
|
initialise: bool, //
|
||||||
|
) -> StreamData<T, D, A> {
|
||||||
|
let mk_vec = || {
|
||||||
|
if malloc {
|
||||||
|
extern crate libc;
|
||||||
|
use std::mem;
|
||||||
|
unsafe {
|
||||||
|
// we do the typical C malloc with a NULL check here
|
||||||
|
let bytes = mem::size_of::<T>() * size;
|
||||||
|
let ptr = libc::malloc(bytes as libc::size_t) as *mut T;
|
||||||
|
if ptr.is_null() {
|
||||||
|
panic!(
|
||||||
|
"Cannot allocate {} bytes in `sizeof(T) * size` (T = {}, size = {})",
|
||||||
|
bytes,
|
||||||
|
mem::size_of::<T>(),
|
||||||
|
size
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let mut xs = Vec::from_raw_parts_in(ptr, size, size, allocator);
|
||||||
|
if initialise {
|
||||||
|
xs.fill(T::default());
|
||||||
|
}
|
||||||
|
xs
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if initialise {
|
||||||
|
let mut xs = Vec::new_in(allocator);
|
||||||
|
xs.resize(size, T::default());
|
||||||
|
xs
|
||||||
|
} else {
|
||||||
|
// try not to touch the vec after allocation
|
||||||
|
let mut xs = Vec::with_capacity_in(size, allocator);
|
||||||
|
unsafe {
|
||||||
|
xs.set_len(size);
|
||||||
|
}
|
||||||
|
xs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
StreamData {
|
||||||
|
device,
|
||||||
|
size,
|
||||||
|
scalar,
|
||||||
|
init,
|
||||||
|
a: mk_vec(),
|
||||||
|
b: mk_vec(),
|
||||||
|
c: mk_vec(),
|
||||||
|
needs_dealloc: malloc,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn clean_up(self) {
|
||||||
|
if self.needs_dealloc {
|
||||||
|
unsafe {
|
||||||
|
extern crate libc;
|
||||||
|
let free_ts = move |xs: Vec<T, A>| {
|
||||||
|
// make sure we don't call dealloc for vec anymore
|
||||||
|
// XXX it's important we don't free xs.as_mut_ptr() here and use xs.into_raw_parts_with_alloc()
|
||||||
|
// as that function handles drops semantic for us
|
||||||
|
// if we free the the raw ptr directly, the compiler will still drop the vec and then segfault
|
||||||
|
let (ptr, _, _, _) = xs.into_raw_parts_with_alloc();
|
||||||
|
libc::free(ptr as *mut libc::c_void);
|
||||||
|
};
|
||||||
|
free_ts(self.a);
|
||||||
|
free_ts(self.b);
|
||||||
|
free_ts(self.c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait RustStream<T: Default> {
|
||||||
|
fn init_arrays(&mut self);
|
||||||
|
fn read_arrays(&mut self) {} // default to no-op as most impl. doesn't need this
|
||||||
|
fn copy(&mut self);
|
||||||
|
fn mul(&mut self);
|
||||||
|
fn add(&mut self);
|
||||||
|
fn triad(&mut self);
|
||||||
|
fn nstream(&mut self);
|
||||||
|
fn dot(&mut self) -> T;
|
||||||
|
|
||||||
|
fn run_all(&mut self, n: usize) -> (AllTiming<Vec<Duration>>, T) {
|
||||||
|
let mut timings: AllTiming<Vec<Duration>> = AllTiming {
|
||||||
|
copy: vec![Duration::default(); n],
|
||||||
|
mul: vec![Duration::default(); n],
|
||||||
|
add: vec![Duration::default(); n],
|
||||||
|
triad: vec![Duration::default(); n],
|
||||||
|
dot: vec![Duration::default(); n],
|
||||||
|
};
|
||||||
|
let mut last_sum = T::default();
|
||||||
|
for i in 0..n {
|
||||||
|
timings.copy[i] = timed(|| self.copy());
|
||||||
|
timings.mul[i] = timed(|| self.mul());
|
||||||
|
timings.add[i] = timed(|| self.add());
|
||||||
|
timings.triad[i] = timed(|| self.triad());
|
||||||
|
let (dot, sum) = timed_mut(&mut || self.dot());
|
||||||
|
timings.dot[i] = dot;
|
||||||
|
last_sum = sum;
|
||||||
|
}
|
||||||
|
(timings, last_sum)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run_triad(&mut self, n: usize) -> Duration {
|
||||||
|
timed(|| {
|
||||||
|
for _ in 0..n {
|
||||||
|
self.triad();
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run_nstream(&mut self, n: usize) -> Vec<Duration> {
|
||||||
|
(0..n).map(|_| timed(|| self.nstream())).collect::<Vec<_>>()
|
||||||
|
}
|
||||||
|
}
|
||||||
266
src/rust/rust-stream/src/unsafe_stream.rs
Normal file
266
src/rust/rust-stream/src/unsafe_stream.rs
Normal file
@ -0,0 +1,266 @@
|
|||||||
|
extern crate core_affinity;
|
||||||
|
|
||||||
|
use std::alloc::Allocator;
|
||||||
|
use std::iter::Sum;
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
|
use crate::stream::{AllocatorType, ArrayType, RustStream, StreamData};
|
||||||
|
|
||||||
|
use self::core_affinity::CoreId;
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
struct UnsafeData<T>(*mut T, usize);
|
||||||
|
|
||||||
|
impl<T: ArrayType> UnsafeData<T> {
|
||||||
|
fn empty() -> UnsafeData<T> { UnsafeData(([] as [T; 0]).as_mut_ptr(), 0) }
|
||||||
|
fn new<A: Allocator>(xs: &mut Vec<T, A>) -> UnsafeData<T> {
|
||||||
|
UnsafeData(xs.as_mut_ptr(), xs.len())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_slice(&self) -> &mut [T] { unsafe { std::slice::from_raw_parts_mut(self.0, self.1) } }
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe impl<T> Send for UnsafeData<T> {}
|
||||||
|
unsafe impl<T> Sync for UnsafeData<T> {}
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
struct UnsafeRefs<T> {
|
||||||
|
a: UnsafeData<T>,
|
||||||
|
b: UnsafeData<T>,
|
||||||
|
c: UnsafeData<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe impl<T> Send for UnsafeRefs<T> {}
|
||||||
|
unsafe impl<T> Sync for UnsafeRefs<T> {}
|
||||||
|
|
||||||
|
pub struct UnsafeDevice<T: ArrayType> {
|
||||||
|
pub(crate) ncore: usize,
|
||||||
|
pub(crate) pin: bool,
|
||||||
|
pub(crate) core_ids: Vec<CoreId>,
|
||||||
|
data: UnsafeRefs<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: ArrayType> UnsafeDevice<T> {
|
||||||
|
pub fn new(ncore: usize, pin: bool) -> Self {
|
||||||
|
let mut core_ids = match core_affinity::get_core_ids() {
|
||||||
|
Some(xs) => xs,
|
||||||
|
None => {
|
||||||
|
colour::e_red_ln!("Cannot enumerate cores, pinning will not work if enabled");
|
||||||
|
(0..ncore).map(|i| CoreId { id: i }).collect()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
core_ids.resize(ncore, core_ids[0]);
|
||||||
|
|
||||||
|
UnsafeDevice {
|
||||||
|
ncore,
|
||||||
|
pin,
|
||||||
|
core_ids,
|
||||||
|
data: UnsafeRefs { a: UnsafeData::empty(), b: UnsafeData::empty(), c: UnsafeData::empty() },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn thread_ranges(&self, len: usize) -> Vec<(usize, Range<usize>)> {
|
||||||
|
let chunk = (len as f64 / self.ncore as f64).ceil() as usize;
|
||||||
|
(0..self.ncore)
|
||||||
|
.map(|t| {
|
||||||
|
(t, if t == self.ncore - 1 { (t * chunk)..len } else { (t * chunk)..((t + 1) * chunk) })
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unsafe threaded version, it should be semantically equal to the single threaded version
|
||||||
|
impl<T: 'static + ArrayType + Sync + Send + Sum, A: AllocatorType + Sync + Send> RustStream<T>
|
||||||
|
for StreamData<T, UnsafeDevice<T>, A>
|
||||||
|
{
|
||||||
|
fn init_arrays(&mut self) {
|
||||||
|
self.device.data.a = UnsafeData::new(&mut self.a);
|
||||||
|
self.device.data.b = UnsafeData::new(&mut self.b);
|
||||||
|
self.device.data.c = UnsafeData::new(&mut self.c);
|
||||||
|
let init = self.init;
|
||||||
|
let pin = self.device.pin;
|
||||||
|
let data = self.device.data;
|
||||||
|
self
|
||||||
|
.device
|
||||||
|
.thread_ranges(self.size)
|
||||||
|
.into_iter()
|
||||||
|
.map(|(t, r)| {
|
||||||
|
let core = self.device.core_ids[t];
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(core);
|
||||||
|
}
|
||||||
|
let a = data.a.get_slice();
|
||||||
|
let b = data.b.get_slice();
|
||||||
|
let c = data.c.get_slice();
|
||||||
|
for i in r {
|
||||||
|
a[i] = init.0;
|
||||||
|
b[i] = init.1;
|
||||||
|
c[i] = init.2;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into_iter()
|
||||||
|
.for_each(|t| t.join().unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn copy(&mut self) {
|
||||||
|
let pin = self.device.pin;
|
||||||
|
let data = self.device.data;
|
||||||
|
self
|
||||||
|
.device
|
||||||
|
.thread_ranges(self.size)
|
||||||
|
.into_iter()
|
||||||
|
.map(|(t, r)| {
|
||||||
|
let core = self.device.core_ids[t];
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(core);
|
||||||
|
}
|
||||||
|
let a = data.a.get_slice();
|
||||||
|
let c = data.c.get_slice();
|
||||||
|
for i in r {
|
||||||
|
c[i] = a[i];
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into_iter()
|
||||||
|
.for_each(|t| t.join().unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mul(&mut self) {
|
||||||
|
let scalar = self.scalar;
|
||||||
|
let pin = self.device.pin;
|
||||||
|
let data = self.device.data;
|
||||||
|
self
|
||||||
|
.device
|
||||||
|
.thread_ranges(self.size)
|
||||||
|
.into_iter()
|
||||||
|
.map(|(t, r)| {
|
||||||
|
let core = self.device.core_ids[t];
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(core);
|
||||||
|
}
|
||||||
|
let b = data.b.get_slice();
|
||||||
|
let c = data.c.get_slice();
|
||||||
|
for i in r {
|
||||||
|
b[i] = scalar * c[i];
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into_iter()
|
||||||
|
.for_each(|t| t.join().unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add(&mut self) {
|
||||||
|
let pin = self.device.pin;
|
||||||
|
let data = self.device.data;
|
||||||
|
self
|
||||||
|
.device
|
||||||
|
.thread_ranges(self.size)
|
||||||
|
.into_iter()
|
||||||
|
.map(|(t, r)| {
|
||||||
|
let core = self.device.core_ids[t];
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(core);
|
||||||
|
}
|
||||||
|
let a = data.a.get_slice();
|
||||||
|
let b = data.b.get_slice();
|
||||||
|
let c = data.c.get_slice();
|
||||||
|
for i in r {
|
||||||
|
c[i] = a[i] + b[i];
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into_iter()
|
||||||
|
.for_each(|t| t.join().unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn triad(&mut self) {
|
||||||
|
let scalar = self.scalar;
|
||||||
|
let pin = self.device.pin;
|
||||||
|
let data = self.device.data;
|
||||||
|
self
|
||||||
|
.device
|
||||||
|
.thread_ranges(self.size)
|
||||||
|
.into_iter()
|
||||||
|
.map(|(t, r)| {
|
||||||
|
let core = self.device.core_ids[t];
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(core);
|
||||||
|
}
|
||||||
|
let a = data.a.get_slice();
|
||||||
|
let b = data.b.get_slice();
|
||||||
|
let c = data.c.get_slice();
|
||||||
|
for i in r {
|
||||||
|
a[i] = b[i] + scalar * c[i]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into_iter()
|
||||||
|
.for_each(|t| t.join().unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn nstream(&mut self) {
|
||||||
|
let scalar = self.scalar;
|
||||||
|
let pin = self.device.pin;
|
||||||
|
let data = self.device.data;
|
||||||
|
self
|
||||||
|
.device
|
||||||
|
.thread_ranges(self.size)
|
||||||
|
.into_iter()
|
||||||
|
.map(|(t, r)| {
|
||||||
|
let core = self.device.core_ids[t];
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(core);
|
||||||
|
}
|
||||||
|
let a = data.a.get_slice();
|
||||||
|
let b = data.b.get_slice();
|
||||||
|
let c = data.c.get_slice();
|
||||||
|
for i in r {
|
||||||
|
a[i] += b[i] + scalar * c[i]
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into_iter()
|
||||||
|
.for_each(|t| t.join().unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dot(&mut self) -> T {
|
||||||
|
let pin = self.device.pin;
|
||||||
|
let data = self.device.data;
|
||||||
|
self
|
||||||
|
.device
|
||||||
|
.thread_ranges(self.size)
|
||||||
|
.into_iter()
|
||||||
|
.map(|(t, r)| {
|
||||||
|
let core = self.device.core_ids[t];
|
||||||
|
std::thread::spawn(move || {
|
||||||
|
if pin {
|
||||||
|
core_affinity::set_for_current(core);
|
||||||
|
}
|
||||||
|
let a = data.a.get_slice();
|
||||||
|
let b = data.b.get_slice();
|
||||||
|
let mut p = T::default();
|
||||||
|
for i in r {
|
||||||
|
p += a[i] * b[i];
|
||||||
|
}
|
||||||
|
p
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into_iter()
|
||||||
|
.map(|t| t.join().unwrap())
|
||||||
|
.sum()
|
||||||
|
}
|
||||||
|
}
|
||||||
17
src/rust/rust-stream/tests/integration_test.rs
Normal file
17
src/rust/rust-stream/tests/integration_test.rs
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
use rstest::rstest;
|
||||||
|
|
||||||
|
#[rstest]
|
||||||
|
fn test_main(
|
||||||
|
#[values(0, 1, 2, 3, 4)] device: usize, //
|
||||||
|
#[values("", "--pin")] pin: &str, //
|
||||||
|
#[values("", "--malloc")] malloc: &str, //
|
||||||
|
#[values("", "--init")] init: &str, //
|
||||||
|
#[values("", "--triad-only", "--nstream-only")] option: &str, //
|
||||||
|
) {
|
||||||
|
let line = format!(
|
||||||
|
"rust-stream --arraysize 2048 --device {} {} {} {} {}",
|
||||||
|
device, pin, malloc, init, option
|
||||||
|
);
|
||||||
|
let args = line.split_whitespace().map(|s| s.to_string()).collect::<Vec<_>>();
|
||||||
|
assert!(rust_stream::run(&args));
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user