Merge branch 'main' into java

This commit is contained in:
Tom Lin 2021-11-25 13:14:06 +00:00 committed by GitHub
commit 3d2f55da4a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
51 changed files with 5189 additions and 3 deletions

View File

@ -3,6 +3,7 @@ on: [push, pull_request]
jobs: jobs:
test-java: test-java:
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
defaults: defaults:
@ -15,6 +16,36 @@ jobs:
- name: Test run - name: Test run
if: ${{ ! cancelled() }} if: ${{ ! cancelled() }}
run: java -jar target/java-stream.jar --arraysize 2048 run: java -jar target/java-stream.jar --arraysize 2048
test-julia:
runs-on: ubuntu-18.04
defaults:
run:
working-directory: ./JuliaStream.jl
steps:
- uses: actions/checkout@v2
- name: Setup project
run: julia --project -e 'import Pkg; Pkg.instantiate()'
- name: Test run PlainStream.jl
if: ${{ ! cancelled() }}
run: julia --project src/PlainStream.jl --arraysize 2048
- name: Test run ThreadedStream.jl
if: ${{ ! cancelled() }}
run: julia --threads 2 --project src/ThreadedStream.jl --arraysize 2048
- name: Test run DistributedStream.jl (no flag)
if: ${{ ! cancelled() }}
run: julia --project src/DistributedStream.jl --arraysize 2048
- name: Test run DistributedStream.jl (-p 2)
if: ${{ ! cancelled() }}
run: julia -p 2 --project src/DistributedStream.jl --arraysize 2048
- name: Test run CUDAStream.jl
if: ${{ ! cancelled() }}
run: julia --project src/CUDAStream.jl --list
- name: Test run AMDGPUStream.jl
if: ${{ ! cancelled() }}
run: julia --project src/AMDGPUStream.jl --list
test: test:
runs-on: ubuntu-18.04 runs-on: ubuntu-18.04
steps: steps:

14
CITATION.cff Normal file
View File

@ -0,0 +1,14 @@
cff-version: 1.1.0
message: If you use this software, please cite it as below.
authors:
- family-names: Deakin
given-names: Tom
affiliation: University of Bristol
website: https://hpc.tomdeakin.com
- family-names: McIntosh-Smith
given-names: Simon
affiliation: University of Bristol
website: https://uob-hpc.github.io
title: BabelStream
version: 3.4
date-released: 2019-04-10

View File

@ -0,0 +1,2 @@
indent = 2
margin = 100

5
JuliaStream.jl/.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
*.jl.cov
*.jl.*.cov
*.jl.mem
/docs/build/
/docs/Manifest.toml

View File

@ -0,0 +1,415 @@
# This file is machine-generated - editing it directly is not advised
[[AMDGPU]]
deps = ["AbstractFFTs", "Adapt", "BinaryProvider", "CEnum", "GPUArrays", "GPUCompiler", "HIP_jll", "LLVM", "Libdl", "LinearAlgebra", "MacroTools", "Pkg", "Printf", "ROCmDeviceLibs_jll", "Random", "Requires", "Setfield", "hsa_rocr_jll"]
git-tree-sha1 = "d64c97447a753cfbf0158d6c7be513f34526d559"
uuid = "21141c5a-9bdb-4563-92ae-f87d6854732e"
version = "0.2.12"
[[AbstractFFTs]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0"
uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
version = "1.0.1"
[[Adapt]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "84918055d15b3114ede17ac6a7182f68870c16f7"
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
version = "3.3.1"
[[ArgParse]]
deps = ["Logging", "TextWrap"]
git-tree-sha1 = "3102bce13da501c9104df33549f511cd25264d7d"
uuid = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
version = "1.1.4"
[[ArgTools]]
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
[[Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
[[Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
[[BinaryProvider]]
deps = ["Libdl", "Logging", "SHA"]
git-tree-sha1 = "ecdec412a9abc8db54c0efc5548c64dfce072058"
uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
version = "0.5.10"
[[Bzip2_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "19a35467a82e236ff51bc17a3a44b69ef35185a2"
uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0"
version = "1.0.8+0"
[[CEnum]]
git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9"
uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
version = "0.4.1"
[[ConstructionBase]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "f74e9d5388b8620b4cee35d4c5a618dd4dc547f4"
uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
version = "1.3.0"
[[Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
[[Downloads]]
deps = ["ArgTools", "LibCURL", "NetworkOptions"]
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
[[Elfutils_jll]]
deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "XZ_jll", "Zlib_jll", "argp_standalone_jll", "fts_jll", "obstack_jll"]
git-tree-sha1 = "8f9fcde6d89b0a3ca51cb2028beab462705c5436"
uuid = "ab5a07f8-06af-567f-a878-e8bb879eba5a"
version = "0.182.0+0"
[[ExprTools]]
git-tree-sha1 = "b7e3d17636b348f005f11040025ae8c6f645fe92"
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
version = "0.1.6"
[[Future]]
deps = ["Random"]
uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
[[GPUArrays]]
deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization", "Statistics"]
git-tree-sha1 = "ececbf05f8904c92814bdbd0aafd5540b0bf2e9a"
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
version = "7.0.1"
[[GPUCompiler]]
deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"]
git-tree-sha1 = "4ed2616d5e656c8716736b64da86755467f26cf5"
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
version = "0.12.9"
[[HIP_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "ROCmCompilerSupport_jll", "ROCmDeviceLibs_jll", "ROCmOpenCLRuntime_jll", "hsa_rocr_jll"]
git-tree-sha1 = "5097d8f7b6842156ab0928371b3d03fefd8decab"
uuid = "2696aab5-0948-5276-aa9a-2a86a37016b8"
version = "4.0.0+1"
[[InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
[[JLLWrappers]]
deps = ["Preferences"]
git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
version = "1.3.0"
[[LLVM]]
deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"]
git-tree-sha1 = "23a47d417a3cd9c2e73c854bac7dd4731c105ef7"
uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
version = "4.4.0"
[[LLVMExtra_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "9c360e5ce980b88bb31a7b086dbb19469008154b"
uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab"
version = "0.0.10+0"
[[LibCURL]]
deps = ["LibCURL_jll", "MozillaCACerts_jll"]
uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
[[LibCURL_jll]]
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
[[LibGit2]]
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
[[LibSSH2_jll]]
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
[[Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
[[Libgcrypt_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgpg_error_jll", "Pkg"]
git-tree-sha1 = "64613c82a59c120435c067c2b809fc61cf5166ae"
uuid = "d4300ac3-e22c-5743-9152-c294e39db1e4"
version = "1.8.7+0"
[[Libglvnd_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll"]
git-tree-sha1 = "7739f837d6447403596a75d19ed01fd08d6f56bf"
uuid = "7e76a0d4-f3c7-5321-8279-8d96eeed0f29"
version = "1.3.0+3"
[[Libgpg_error_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "c333716e46366857753e273ce6a69ee0945a6db9"
uuid = "7add5ba3-2f88-524e-9cd5-f83b8a55f7b8"
version = "1.42.0+0"
[[Libiconv_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "42b62845d70a619f063a7da093d995ec8e15e778"
uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
version = "1.16.1+1"
[[LinearAlgebra]]
deps = ["Libdl"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
[[Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
[[MacroTools]]
deps = ["Markdown", "Random"]
git-tree-sha1 = "0fb723cd8c45858c22169b2e42269e53271a6df7"
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
version = "0.5.7"
[[Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
[[MbedTLS_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
[[MozillaCACerts_jll]]
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
[[NUMA_jll]]
deps = ["Libdl", "Pkg"]
git-tree-sha1 = "778f9bd14400cff2c32ed357e12766ac0e3d766e"
uuid = "7f51dc2b-bb24-59f8-b771-bb1490e4195d"
version = "2.0.13+1"
[[NetworkOptions]]
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
[[OrderedCollections]]
git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.4.1"
[[Parameters]]
deps = ["OrderedCollections", "UnPack"]
git-tree-sha1 = "2276ac65f1e236e0a6ea70baff3f62ad4c625345"
uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
version = "0.12.2"
[[Pkg]]
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
[[Preferences]]
deps = ["TOML"]
git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a"
uuid = "21216c6a-2e73-6563-6e65-726566657250"
version = "1.2.2"
[[Printf]]
deps = ["Unicode"]
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
[[REPL]]
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
[[ROCmCompilerSupport_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "ROCmDeviceLibs_jll", "hsa_rocr_jll"]
git-tree-sha1 = "56ddcfb5d8b60c9f8c1bc619886f8d363fd1926d"
uuid = "8fbdd1d2-db62-5cd0-981e-905da1486e17"
version = "4.0.0+1"
[[ROCmDeviceLibs_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
git-tree-sha1 = "d764f0f28b5af89aa004871a6a38e5d061f77257"
uuid = "873c0968-716b-5aa7-bb8d-d1e2e2aeff2d"
version = "4.0.0+0"
[[ROCmOpenCLRuntime_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg", "ROCmCompilerSupport_jll", "ROCmDeviceLibs_jll", "Xorg_libX11_jll", "Xorg_xorgproto_jll", "hsa_rocr_jll"]
git-tree-sha1 = "f9e3e2cb40a7990535efa7da9b9dd0e0b458a973"
uuid = "10ae2a08-2eea-53f8-8c20-eec175020e9f"
version = "4.0.0+1"
[[Random]]
deps = ["Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
[[Requires]]
deps = ["UUIDs"]
git-tree-sha1 = "4036a3bd08ac7e968e27c203d45f5fff15020621"
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
version = "1.1.3"
[[SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
[[Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
[[Setfield]]
deps = ["ConstructionBase", "Future", "MacroTools", "Requires"]
git-tree-sha1 = "fca29e68c5062722b5b4435594c3d1ba557072a3"
uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46"
version = "0.7.1"
[[Sockets]]
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
[[SparseArrays]]
deps = ["LinearAlgebra", "Random"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
[[Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[[TOML]]
deps = ["Dates"]
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
[[Tar]]
deps = ["ArgTools", "SHA"]
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
[[TextWrap]]
git-tree-sha1 = "9250ef9b01b66667380cf3275b3f7488d0e25faf"
uuid = "b718987f-49a8-5099-9789-dcd902bef87d"
version = "1.0.1"
[[TimerOutputs]]
deps = ["ExprTools", "Printf"]
git-tree-sha1 = "209a8326c4f955e2442c07b56029e88bb48299c7"
uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
version = "0.5.12"
[[UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
[[UnPack]]
git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
version = "1.0.2"
[[Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
[[XML2_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"]
git-tree-sha1 = "1acf5bdf07aa0907e0a37d3718bb88d4b687b74a"
uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a"
version = "2.9.12+0"
[[XSLT_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Libgpg_error_jll", "Libiconv_jll", "Pkg", "XML2_jll", "Zlib_jll"]
git-tree-sha1 = "91844873c4085240b95e795f692c4cec4d805f8a"
uuid = "aed1982a-8fda-507f-9586-7b0439959a61"
version = "1.1.34+0"
[[XZ_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "a921669cd9a45c23031fd4eb904f5cc3d20de415"
uuid = "ffd25f8a-64ca-5728-b0f7-c24cf3aae800"
version = "5.2.5+2"
[[Xorg_libX11_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll", "Xorg_xtrans_jll"]
git-tree-sha1 = "5be649d550f3f4b95308bf0183b82e2582876527"
uuid = "4f6342f7-b3d2-589e-9d20-edeb45f2b2bc"
version = "1.6.9+4"
[[Xorg_libXau_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "4e490d5c960c314f33885790ed410ff3a94ce67e"
uuid = "0c0b7dd1-d40b-584c-a123-a41640f87eec"
version = "1.0.9+4"
[[Xorg_libXdmcp_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "4fe47bd2247248125c428978740e18a681372dd4"
uuid = "a3789734-cfe1-5b06-b2d0-1dd0d9d62d05"
version = "1.1.3+4"
[[Xorg_libXext_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
git-tree-sha1 = "b7c0aa8c376b31e4852b360222848637f481f8c3"
uuid = "1082639a-0dae-5f34-9b06-72781eeb8cb3"
version = "1.3.4+4"
[[Xorg_libpthread_stubs_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "6783737e45d3c59a4a4c4091f5f88cdcf0908cbb"
uuid = "14d82f49-176c-5ed1-bb49-ad3f5cbd8c74"
version = "0.1.0+3"
[[Xorg_libxcb_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "XSLT_jll", "Xorg_libXau_jll", "Xorg_libXdmcp_jll", "Xorg_libpthread_stubs_jll"]
git-tree-sha1 = "daf17f441228e7a3833846cd048892861cff16d6"
uuid = "c7cfdc94-dc32-55de-ac96-5a1b8d977c5b"
version = "1.13.0+3"
[[Xorg_xorgproto_jll]]
deps = ["Libdl", "Pkg"]
git-tree-sha1 = "9a9eb8ce756fe0bca01b4be16da770e18d264972"
uuid = "c4d99508-4286-5418-9131-c86396af500b"
version = "2019.2.0+2"
[[Xorg_xtrans_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "79c31e7844f6ecf779705fbc12146eb190b7d845"
uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10"
version = "1.4.0+3"
[[Zlib_jll]]
deps = ["Libdl"]
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
[[argp_standalone_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "feaf9f6293003c2bf53056fd6930d677ed340b34"
uuid = "c53206cc-00f7-50bf-ad1e-3ae1f6e49bc3"
version = "1.3.1+0"
[[fts_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "78732b942383d2cb521df8a1a0814911144e663d"
uuid = "d65627f6-89bd-53e8-8ab5-8b75ff535eee"
version = "1.2.7+1"
[[hsa_rocr_jll]]
deps = ["Artifacts", "Elfutils_jll", "JLLWrappers", "Libdl", "NUMA_jll", "Pkg", "Zlib_jll", "hsakmt_roct_jll"]
git-tree-sha1 = "df8d73efec8b1e53ad527d208f5343c0368f0fcd"
uuid = "dd59ff1a-a01a-568d-8b29-0669330f116a"
version = "4.0.0+0"
[[hsakmt_roct_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "NUMA_jll", "Pkg"]
git-tree-sha1 = "80e0c9940e15cfd6f1f1e9d9f3953ec4d48d3d4a"
uuid = "1cecccd7-a9b6-5045-9cdc-a44c19b16d76"
version = "4.0.0+0"
[[nghttp2_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
[[obstack_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "1c4a6b66e934fc6db4649cb2910c72f53bbfea7e"
uuid = "c88a4935-d25e-5644-aacc-5db6f1b8ef79"
version = "1.2.2+0"
[[p7zip_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"

View File

@ -0,0 +1,7 @@
[deps]
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
[compat]
julia = "1.6"

View File

@ -0,0 +1,316 @@
# This file is machine-generated - editing it directly is not advised
[[AbstractFFTs]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0"
uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
version = "1.0.1"
[[Adapt]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "84918055d15b3114ede17ac6a7182f68870c16f7"
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
version = "3.3.1"
[[ArgParse]]
deps = ["Logging", "TextWrap"]
git-tree-sha1 = "3102bce13da501c9104df33549f511cd25264d7d"
uuid = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
version = "1.1.4"
[[ArgTools]]
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
[[Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
[[BFloat16s]]
deps = ["LinearAlgebra", "Test"]
git-tree-sha1 = "4af69e205efc343068dc8722b8dfec1ade89254a"
uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
version = "0.1.0"
[[Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
[[CEnum]]
git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9"
uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
version = "0.4.1"
[[CUDA]]
deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CompilerSupportLibraries_jll", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "SpecialFunctions", "TimerOutputs"]
git-tree-sha1 = "c583f3ccdce071b8a8bce9bf3d5d5409eaf36d2b"
uuid = "052768ef-5323-5732-b1bb-66c8b64840ba"
version = "3.4.1"
[[ChainRulesCore]]
deps = ["Compat", "LinearAlgebra", "SparseArrays"]
git-tree-sha1 = "bdc0937269321858ab2a4f288486cb258b9a0af7"
uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
version = "1.3.0"
[[Compat]]
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
git-tree-sha1 = "727e463cfebd0c7b999bbf3e9e7e16f254b94193"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "3.34.0"
[[CompilerSupportLibraries_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
[[Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
[[DelimitedFiles]]
deps = ["Mmap"]
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
[[Distributed]]
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
[[DocStringExtensions]]
deps = ["LibGit2"]
git-tree-sha1 = "a32185f5428d3986f47c2ab78b1f216d5e6cc96f"
uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
version = "0.8.5"
[[Downloads]]
deps = ["ArgTools", "LibCURL", "NetworkOptions"]
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
[[ExprTools]]
git-tree-sha1 = "b7e3d17636b348f005f11040025ae8c6f645fe92"
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
version = "0.1.6"
[[GPUArrays]]
deps = ["Adapt", "LinearAlgebra", "Printf", "Random", "Serialization", "Statistics"]
git-tree-sha1 = "8fac1cf7d6ce0f2249c7acaf25d22e1e85c4a07f"
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
version = "8.0.2"
[[GPUCompiler]]
deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"]
git-tree-sha1 = "4ed2616d5e656c8716736b64da86755467f26cf5"
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
version = "0.12.9"
[[InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
[[IrrationalConstants]]
git-tree-sha1 = "f76424439413893a832026ca355fe273e93bce94"
uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
version = "0.1.0"
[[JLLWrappers]]
deps = ["Preferences"]
git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
version = "1.3.0"
[[LLVM]]
deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"]
git-tree-sha1 = "23a47d417a3cd9c2e73c854bac7dd4731c105ef7"
uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
version = "4.4.0"
[[LLVMExtra_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "9c360e5ce980b88bb31a7b086dbb19469008154b"
uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab"
version = "0.0.10+0"
[[LazyArtifacts]]
deps = ["Artifacts", "Pkg"]
uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
[[LibCURL]]
deps = ["LibCURL_jll", "MozillaCACerts_jll"]
uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
[[LibCURL_jll]]
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
[[LibGit2]]
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
[[LibSSH2_jll]]
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
[[Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
[[LinearAlgebra]]
deps = ["Libdl"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
[[LogExpFunctions]]
deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"]
git-tree-sha1 = "3d682c07e6dd250ed082f883dc88aee7996bf2cc"
uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
version = "0.3.0"
[[Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
[[Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
[[MbedTLS_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
[[Mmap]]
uuid = "a63ad114-7e13-5084-954f-fe012c677804"
[[MozillaCACerts_jll]]
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
[[NetworkOptions]]
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
[[OpenSpecFun_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
version = "0.5.5+0"
[[OrderedCollections]]
git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.4.1"
[[Parameters]]
deps = ["OrderedCollections", "UnPack"]
git-tree-sha1 = "2276ac65f1e236e0a6ea70baff3f62ad4c625345"
uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
version = "0.12.2"
[[Pkg]]
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
[[Preferences]]
deps = ["TOML"]
git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a"
uuid = "21216c6a-2e73-6563-6e65-726566657250"
version = "1.2.2"
[[Printf]]
deps = ["Unicode"]
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
[[REPL]]
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
[[Random]]
deps = ["Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
[[Random123]]
deps = ["Libdl", "Random", "RandomNumbers"]
git-tree-sha1 = "0e8b146557ad1c6deb1367655e052276690e71a3"
uuid = "74087812-796a-5b5d-8853-05524746bad3"
version = "1.4.2"
[[RandomNumbers]]
deps = ["Random", "Requires"]
git-tree-sha1 = "043da614cc7e95c703498a491e2c21f58a2b8111"
uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143"
version = "1.5.3"
[[Reexport]]
git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
uuid = "189a3867-3050-52da-a836-e630ba90ab69"
version = "1.2.2"
[[Requires]]
deps = ["UUIDs"]
git-tree-sha1 = "4036a3bd08ac7e968e27c203d45f5fff15020621"
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
version = "1.1.3"
[[SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
[[Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
[[SharedArrays]]
deps = ["Distributed", "Mmap", "Random", "Serialization"]
uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
[[Sockets]]
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
[[SparseArrays]]
deps = ["LinearAlgebra", "Random"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
[[SpecialFunctions]]
deps = ["ChainRulesCore", "LogExpFunctions", "OpenSpecFun_jll"]
git-tree-sha1 = "a322a9493e49c5f3a10b50df3aedaf1cdb3244b7"
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
version = "1.6.1"
[[Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[[TOML]]
deps = ["Dates"]
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
[[Tar]]
deps = ["ArgTools", "SHA"]
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
[[Test]]
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[[TextWrap]]
git-tree-sha1 = "9250ef9b01b66667380cf3275b3f7488d0e25faf"
uuid = "b718987f-49a8-5099-9789-dcd902bef87d"
version = "1.0.1"
[[TimerOutputs]]
deps = ["ExprTools", "Printf"]
git-tree-sha1 = "209a8326c4f955e2442c07b56029e88bb48299c7"
uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
version = "0.5.12"
[[UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
[[UnPack]]
git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
version = "1.0.2"
[[Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
[[Zlib_jll]]
deps = ["Libdl"]
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
[[nghttp2_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
[[p7zip_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"

View File

@ -0,0 +1,7 @@
[deps]
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
[compat]
julia = "1.6"

View File

@ -0,0 +1,547 @@
# This file is machine-generated - editing it directly is not advised
[[AMDGPU]]
deps = ["AbstractFFTs", "Adapt", "BinaryProvider", "CEnum", "GPUArrays", "GPUCompiler", "HIP_jll", "LLVM", "Libdl", "LinearAlgebra", "MacroTools", "Pkg", "Printf", "ROCmDeviceLibs_jll", "Random", "Requires", "Setfield", "hsa_rocr_jll"]
git-tree-sha1 = "d64c97447a753cfbf0158d6c7be513f34526d559"
uuid = "21141c5a-9bdb-4563-92ae-f87d6854732e"
version = "0.2.12"
[[AbstractFFTs]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0"
uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
version = "1.0.1"
[[Adapt]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "84918055d15b3114ede17ac6a7182f68870c16f7"
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
version = "3.3.1"
[[ArgParse]]
deps = ["Logging", "TextWrap"]
git-tree-sha1 = "3102bce13da501c9104df33549f511cd25264d7d"
uuid = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
version = "1.1.4"
[[ArgTools]]
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
[[Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
[[BFloat16s]]
deps = ["LinearAlgebra", "Test"]
git-tree-sha1 = "4af69e205efc343068dc8722b8dfec1ade89254a"
uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
version = "0.1.0"
[[Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
[[BinaryProvider]]
deps = ["Libdl", "Logging", "SHA"]
git-tree-sha1 = "ecdec412a9abc8db54c0efc5548c64dfce072058"
uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
version = "0.5.10"
[[Bzip2_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "19a35467a82e236ff51bc17a3a44b69ef35185a2"
uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0"
version = "1.0.8+0"
[[CEnum]]
git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9"
uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
version = "0.4.1"
[[CUDA]]
deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CompilerSupportLibraries_jll", "DataStructures", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "SpecialFunctions", "TimerOutputs"]
git-tree-sha1 = "5e696e37e51b01ae07bd9f700afe6cbd55250bce"
uuid = "052768ef-5323-5732-b1bb-66c8b64840ba"
version = "3.3.4"
[[CUDAKernels]]
deps = ["Adapt", "CUDA", "Cassette", "KernelAbstractions", "SpecialFunctions", "StaticArrays"]
git-tree-sha1 = "81f76297b63c67723b1d60f5e7e002ae3393974b"
uuid = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
version = "0.3.0"
[[Cassette]]
git-tree-sha1 = "b4b1d61ebbae2bc69a45e3a6b8439b4e411bc131"
uuid = "7057c7e9-c182-5462-911a-8362d720325c"
version = "0.3.8"
[[ChainRulesCore]]
deps = ["Compat", "LinearAlgebra", "SparseArrays"]
git-tree-sha1 = "bdc0937269321858ab2a4f288486cb258b9a0af7"
uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
version = "1.3.0"
[[Compat]]
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
git-tree-sha1 = "727e463cfebd0c7b999bbf3e9e7e16f254b94193"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "3.34.0"
[[CompilerSupportLibraries_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
[[ConstructionBase]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "f74e9d5388b8620b4cee35d4c5a618dd4dc547f4"
uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
version = "1.3.0"
[[DataStructures]]
deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "7d9d316f04214f7efdbb6398d545446e246eff02"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.18.10"
[[Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
[[DelimitedFiles]]
deps = ["Mmap"]
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
[[Distributed]]
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
[[DocStringExtensions]]
deps = ["LibGit2"]
git-tree-sha1 = "a32185f5428d3986f47c2ab78b1f216d5e6cc96f"
uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
version = "0.8.5"
[[Downloads]]
deps = ["ArgTools", "LibCURL", "NetworkOptions"]
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
[[Elfutils_jll]]
deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "XZ_jll", "Zlib_jll", "argp_standalone_jll", "fts_jll", "obstack_jll"]
git-tree-sha1 = "8f9fcde6d89b0a3ca51cb2028beab462705c5436"
uuid = "ab5a07f8-06af-567f-a878-e8bb879eba5a"
version = "0.182.0+0"
[[ExprTools]]
git-tree-sha1 = "b7e3d17636b348f005f11040025ae8c6f645fe92"
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
version = "0.1.6"
[[Future]]
deps = ["Random"]
uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
[[GPUArrays]]
deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization", "Statistics"]
git-tree-sha1 = "ececbf05f8904c92814bdbd0aafd5540b0bf2e9a"
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
version = "7.0.1"
[[GPUCompiler]]
deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"]
git-tree-sha1 = "4ed2616d5e656c8716736b64da86755467f26cf5"
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
version = "0.12.9"
[[HIP_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "ROCmCompilerSupport_jll", "ROCmDeviceLibs_jll", "ROCmOpenCLRuntime_jll", "hsa_rocr_jll"]
git-tree-sha1 = "5097d8f7b6842156ab0928371b3d03fefd8decab"
uuid = "2696aab5-0948-5276-aa9a-2a86a37016b8"
version = "4.0.0+1"
[[InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
[[IrrationalConstants]]
git-tree-sha1 = "f76424439413893a832026ca355fe273e93bce94"
uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
version = "0.1.0"
[[JLLWrappers]]
deps = ["Preferences"]
git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
version = "1.3.0"
[[KernelAbstractions]]
deps = ["Adapt", "Cassette", "InteractiveUtils", "MacroTools", "SpecialFunctions", "StaticArrays", "UUIDs"]
git-tree-sha1 = "5e6c70389c1b1e40adb81664ca8cea6ce8127afc"
uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
version = "0.7.0"
[[LLVM]]
deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"]
git-tree-sha1 = "23a47d417a3cd9c2e73c854bac7dd4731c105ef7"
uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
version = "4.4.0"
[[LLVMExtra_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "9c360e5ce980b88bb31a7b086dbb19469008154b"
uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab"
version = "0.0.10+0"
[[LazyArtifacts]]
deps = ["Artifacts", "Pkg"]
uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
[[LibCURL]]
deps = ["LibCURL_jll", "MozillaCACerts_jll"]
uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
[[LibCURL_jll]]
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
[[LibGit2]]
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
[[LibSSH2_jll]]
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
[[Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
[[Libgcrypt_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgpg_error_jll", "Pkg"]
git-tree-sha1 = "64613c82a59c120435c067c2b809fc61cf5166ae"
uuid = "d4300ac3-e22c-5743-9152-c294e39db1e4"
version = "1.8.7+0"
[[Libglvnd_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll"]
git-tree-sha1 = "7739f837d6447403596a75d19ed01fd08d6f56bf"
uuid = "7e76a0d4-f3c7-5321-8279-8d96eeed0f29"
version = "1.3.0+3"
[[Libgpg_error_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "c333716e46366857753e273ce6a69ee0945a6db9"
uuid = "7add5ba3-2f88-524e-9cd5-f83b8a55f7b8"
version = "1.42.0+0"
[[Libiconv_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "42b62845d70a619f063a7da093d995ec8e15e778"
uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
version = "1.16.1+1"
[[LinearAlgebra]]
deps = ["Libdl"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
[[LogExpFunctions]]
deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"]
git-tree-sha1 = "3d682c07e6dd250ed082f883dc88aee7996bf2cc"
uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
version = "0.3.0"
[[Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
[[MacroTools]]
deps = ["Markdown", "Random"]
git-tree-sha1 = "0fb723cd8c45858c22169b2e42269e53271a6df7"
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
version = "0.5.7"
[[Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
[[MbedTLS_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
[[Mmap]]
uuid = "a63ad114-7e13-5084-954f-fe012c677804"
[[MozillaCACerts_jll]]
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
[[NUMA_jll]]
deps = ["Libdl", "Pkg"]
git-tree-sha1 = "778f9bd14400cff2c32ed357e12766ac0e3d766e"
uuid = "7f51dc2b-bb24-59f8-b771-bb1490e4195d"
version = "2.0.13+1"
[[NetworkOptions]]
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
[[OpenSpecFun_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
version = "0.5.5+0"
[[OrderedCollections]]
git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.4.1"
[[Parameters]]
deps = ["OrderedCollections", "UnPack"]
git-tree-sha1 = "2276ac65f1e236e0a6ea70baff3f62ad4c625345"
uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
version = "0.12.2"
[[Pkg]]
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
[[Preferences]]
deps = ["TOML"]
git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a"
uuid = "21216c6a-2e73-6563-6e65-726566657250"
version = "1.2.2"
[[Printf]]
deps = ["Unicode"]
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
[[REPL]]
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
[[ROCKernels]]
deps = ["AMDGPU", "Adapt", "Cassette", "KernelAbstractions", "SpecialFunctions", "StaticArrays"]
git-tree-sha1 = "41105b861342637dde17797bdd9aaa537aca646b"
uuid = "7eb9e9f0-4bd3-4c4c-8bef-26bd9629d9b9"
version = "0.2.0"
[[ROCmCompilerSupport_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "ROCmDeviceLibs_jll", "hsa_rocr_jll"]
git-tree-sha1 = "56ddcfb5d8b60c9f8c1bc619886f8d363fd1926d"
uuid = "8fbdd1d2-db62-5cd0-981e-905da1486e17"
version = "4.0.0+1"
[[ROCmDeviceLibs_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
git-tree-sha1 = "d764f0f28b5af89aa004871a6a38e5d061f77257"
uuid = "873c0968-716b-5aa7-bb8d-d1e2e2aeff2d"
version = "4.0.0+0"
[[ROCmOpenCLRuntime_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg", "ROCmCompilerSupport_jll", "ROCmDeviceLibs_jll", "Xorg_libX11_jll", "Xorg_xorgproto_jll", "hsa_rocr_jll"]
git-tree-sha1 = "f9e3e2cb40a7990535efa7da9b9dd0e0b458a973"
uuid = "10ae2a08-2eea-53f8-8c20-eec175020e9f"
version = "4.0.0+1"
[[Random]]
deps = ["Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
[[Random123]]
deps = ["Libdl", "Random", "RandomNumbers"]
git-tree-sha1 = "0e8b146557ad1c6deb1367655e052276690e71a3"
uuid = "74087812-796a-5b5d-8853-05524746bad3"
version = "1.4.2"
[[RandomNumbers]]
deps = ["Random", "Requires"]
git-tree-sha1 = "043da614cc7e95c703498a491e2c21f58a2b8111"
uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143"
version = "1.5.3"
[[Reexport]]
git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
uuid = "189a3867-3050-52da-a836-e630ba90ab69"
version = "1.2.2"
[[Requires]]
deps = ["UUIDs"]
git-tree-sha1 = "4036a3bd08ac7e968e27c203d45f5fff15020621"
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
version = "1.1.3"
[[SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
[[Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
[[Setfield]]
deps = ["ConstructionBase", "Future", "MacroTools", "Requires"]
git-tree-sha1 = "fca29e68c5062722b5b4435594c3d1ba557072a3"
uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46"
version = "0.7.1"
[[SharedArrays]]
deps = ["Distributed", "Mmap", "Random", "Serialization"]
uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
[[Sockets]]
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
[[SparseArrays]]
deps = ["LinearAlgebra", "Random"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
[[SpecialFunctions]]
deps = ["ChainRulesCore", "LogExpFunctions", "OpenSpecFun_jll"]
git-tree-sha1 = "a322a9493e49c5f3a10b50df3aedaf1cdb3244b7"
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
version = "1.6.1"
[[StaticArrays]]
deps = ["LinearAlgebra", "Random", "Statistics"]
git-tree-sha1 = "3240808c6d463ac46f1c1cd7638375cd22abbccb"
uuid = "90137ffa-7385-5640-81b9-e52037218182"
version = "1.2.12"
[[Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[[TOML]]
deps = ["Dates"]
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
[[Tar]]
deps = ["ArgTools", "SHA"]
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
[[Test]]
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[[TextWrap]]
git-tree-sha1 = "9250ef9b01b66667380cf3275b3f7488d0e25faf"
uuid = "b718987f-49a8-5099-9789-dcd902bef87d"
version = "1.0.1"
[[TimerOutputs]]
deps = ["ExprTools", "Printf"]
git-tree-sha1 = "209a8326c4f955e2442c07b56029e88bb48299c7"
uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
version = "0.5.12"
[[UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
[[UnPack]]
git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
version = "1.0.2"
[[Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
[[XML2_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"]
git-tree-sha1 = "1acf5bdf07aa0907e0a37d3718bb88d4b687b74a"
uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a"
version = "2.9.12+0"
[[XSLT_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Libgpg_error_jll", "Libiconv_jll", "Pkg", "XML2_jll", "Zlib_jll"]
git-tree-sha1 = "91844873c4085240b95e795f692c4cec4d805f8a"
uuid = "aed1982a-8fda-507f-9586-7b0439959a61"
version = "1.1.34+0"
[[XZ_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "a921669cd9a45c23031fd4eb904f5cc3d20de415"
uuid = "ffd25f8a-64ca-5728-b0f7-c24cf3aae800"
version = "5.2.5+2"
[[Xorg_libX11_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll", "Xorg_xtrans_jll"]
git-tree-sha1 = "5be649d550f3f4b95308bf0183b82e2582876527"
uuid = "4f6342f7-b3d2-589e-9d20-edeb45f2b2bc"
version = "1.6.9+4"
[[Xorg_libXau_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "4e490d5c960c314f33885790ed410ff3a94ce67e"
uuid = "0c0b7dd1-d40b-584c-a123-a41640f87eec"
version = "1.0.9+4"
[[Xorg_libXdmcp_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "4fe47bd2247248125c428978740e18a681372dd4"
uuid = "a3789734-cfe1-5b06-b2d0-1dd0d9d62d05"
version = "1.1.3+4"
[[Xorg_libXext_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
git-tree-sha1 = "b7c0aa8c376b31e4852b360222848637f481f8c3"
uuid = "1082639a-0dae-5f34-9b06-72781eeb8cb3"
version = "1.3.4+4"
[[Xorg_libpthread_stubs_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "6783737e45d3c59a4a4c4091f5f88cdcf0908cbb"
uuid = "14d82f49-176c-5ed1-bb49-ad3f5cbd8c74"
version = "0.1.0+3"
[[Xorg_libxcb_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "XSLT_jll", "Xorg_libXau_jll", "Xorg_libXdmcp_jll", "Xorg_libpthread_stubs_jll"]
git-tree-sha1 = "daf17f441228e7a3833846cd048892861cff16d6"
uuid = "c7cfdc94-dc32-55de-ac96-5a1b8d977c5b"
version = "1.13.0+3"
[[Xorg_xorgproto_jll]]
deps = ["Libdl", "Pkg"]
git-tree-sha1 = "9a9eb8ce756fe0bca01b4be16da770e18d264972"
uuid = "c4d99508-4286-5418-9131-c86396af500b"
version = "2019.2.0+2"
[[Xorg_xtrans_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "79c31e7844f6ecf779705fbc12146eb190b7d845"
uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10"
version = "1.4.0+3"
[[Zlib_jll]]
deps = ["Libdl"]
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
[[argp_standalone_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "feaf9f6293003c2bf53056fd6930d677ed340b34"
uuid = "c53206cc-00f7-50bf-ad1e-3ae1f6e49bc3"
version = "1.3.1+0"
[[fts_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "78732b942383d2cb521df8a1a0814911144e663d"
uuid = "d65627f6-89bd-53e8-8ab5-8b75ff535eee"
version = "1.2.7+1"
[[hsa_rocr_jll]]
deps = ["Artifacts", "Elfutils_jll", "JLLWrappers", "Libdl", "NUMA_jll", "Pkg", "Zlib_jll", "hsakmt_roct_jll"]
git-tree-sha1 = "df8d73efec8b1e53ad527d208f5343c0368f0fcd"
uuid = "dd59ff1a-a01a-568d-8b29-0669330f116a"
version = "4.0.0+0"
[[hsakmt_roct_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "NUMA_jll", "Pkg"]
git-tree-sha1 = "80e0c9940e15cfd6f1f1e9d9f3953ec4d48d3d4a"
uuid = "1cecccd7-a9b6-5045-9cdc-a44c19b16d76"
version = "4.0.0+0"
[[nghttp2_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
[[obstack_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "1c4a6b66e934fc6db4649cb2910c72f53bbfea7e"
uuid = "c88a4935-d25e-5644-aacc-5db6f1b8ef79"
version = "1.2.2+0"
[[p7zip_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"

View File

@ -0,0 +1,11 @@
[deps]
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
CUDAKernels = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
ROCKernels = "7eb9e9f0-4bd3-4c4c-8bef-26bd9629d9b9"
[compat]
julia = "1.6"

View File

@ -0,0 +1,493 @@
# This file is machine-generated - editing it directly is not advised
[[AMDGPU]]
deps = ["AbstractFFTs", "Adapt", "BinaryProvider", "CEnum", "GPUArrays", "GPUCompiler", "LLVM", "Libdl", "LinearAlgebra", "MacroTools", "Printf", "Random", "Requires", "Setfield", "hsa_rocr_jll", "hsakmt_roct_jll"]
git-tree-sha1 = "04fdb3923ac6f55fa7347dce0f0f6f10e321e2e9"
uuid = "21141c5a-9bdb-4563-92ae-f87d6854732e"
version = "0.2.7"
[[AbstractFFTs]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0"
uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
version = "1.0.1"
[[Adapt]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "84918055d15b3114ede17ac6a7182f68870c16f7"
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
version = "3.3.1"
[[ArgParse]]
deps = ["Logging", "TextWrap"]
git-tree-sha1 = "3102bce13da501c9104df33549f511cd25264d7d"
uuid = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
version = "1.1.4"
[[ArgTools]]
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
[[Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
[[BFloat16s]]
deps = ["LinearAlgebra", "Test"]
git-tree-sha1 = "4af69e205efc343068dc8722b8dfec1ade89254a"
uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
version = "0.1.0"
[[Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
[[BinaryProvider]]
deps = ["Libdl", "Logging", "SHA"]
git-tree-sha1 = "ecdec412a9abc8db54c0efc5548c64dfce072058"
uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
version = "0.5.10"
[[Bzip2_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "19a35467a82e236ff51bc17a3a44b69ef35185a2"
uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0"
version = "1.0.8+0"
[[CEnum]]
git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9"
uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
version = "0.4.1"
[[CUDA]]
deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CompilerSupportLibraries_jll", "DataStructures", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "MacroTools", "Memoize", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "SpecialFunctions", "TimerOutputs"]
git-tree-sha1 = "364179416eabc34c9ca32126a6bdb431680c3bad"
uuid = "052768ef-5323-5732-b1bb-66c8b64840ba"
version = "3.2.1"
[[CUDAKernels]]
deps = ["Adapt", "CUDA", "Cassette", "KernelAbstractions", "SpecialFunctions", "StaticArrays"]
git-tree-sha1 = "81f76297b63c67723b1d60f5e7e002ae3393974b"
uuid = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
version = "0.3.0"
[[Cassette]]
git-tree-sha1 = "b4b1d61ebbae2bc69a45e3a6b8439b4e411bc131"
uuid = "7057c7e9-c182-5462-911a-8362d720325c"
version = "0.3.8"
[[ChainRulesCore]]
deps = ["Compat", "LinearAlgebra", "SparseArrays"]
git-tree-sha1 = "bdc0937269321858ab2a4f288486cb258b9a0af7"
uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
version = "1.3.0"
[[Compat]]
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
git-tree-sha1 = "727e463cfebd0c7b999bbf3e9e7e16f254b94193"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "3.34.0"
[[CompilerSupportLibraries_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
[[ConstructionBase]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "f74e9d5388b8620b4cee35d4c5a618dd4dc547f4"
uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
version = "1.3.0"
[[DataStructures]]
deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "7d9d316f04214f7efdbb6398d545446e246eff02"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.18.10"
[[Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
[[DelimitedFiles]]
deps = ["Mmap"]
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
[[Distributed]]
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
[[DocStringExtensions]]
deps = ["LibGit2"]
git-tree-sha1 = "a32185f5428d3986f47c2ab78b1f216d5e6cc96f"
uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
version = "0.8.5"
[[Downloads]]
deps = ["ArgTools", "LibCURL", "NetworkOptions"]
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
[[Elfutils_jll]]
deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "XZ_jll", "Zlib_jll", "argp_standalone_jll", "fts_jll", "obstack_jll"]
git-tree-sha1 = "8f9fcde6d89b0a3ca51cb2028beab462705c5436"
uuid = "ab5a07f8-06af-567f-a878-e8bb879eba5a"
version = "0.182.0+0"
[[ExprTools]]
git-tree-sha1 = "b7e3d17636b348f005f11040025ae8c6f645fe92"
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
version = "0.1.6"
[[Future]]
deps = ["Random"]
uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
[[GPUArrays]]
deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization", "Statistics"]
git-tree-sha1 = "df5b8569904c5c10e84c640984cfff054b18c086"
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
version = "6.4.1"
[[GPUCompiler]]
deps = ["DataStructures", "ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "Scratch", "Serialization", "TimerOutputs", "UUIDs"]
git-tree-sha1 = "42d635f6d87af125b86288df3819f805fb4d851a"
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
version = "0.11.5"
[[InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
[[IrrationalConstants]]
git-tree-sha1 = "f76424439413893a832026ca355fe273e93bce94"
uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
version = "0.1.0"
[[JLLWrappers]]
deps = ["Preferences"]
git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
version = "1.3.0"
[[KernelAbstractions]]
deps = ["Adapt", "Cassette", "InteractiveUtils", "MacroTools", "SpecialFunctions", "StaticArrays", "UUIDs"]
git-tree-sha1 = "5e6c70389c1b1e40adb81664ca8cea6ce8127afc"
uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
version = "0.7.0"
[[LLVM]]
deps = ["CEnum", "Libdl", "Printf", "Unicode"]
git-tree-sha1 = "f57ac3fd2045b50d3db081663837ac5b4096947e"
uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
version = "3.9.0"
[[LazyArtifacts]]
deps = ["Artifacts", "Pkg"]
uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
[[LibCURL]]
deps = ["LibCURL_jll", "MozillaCACerts_jll"]
uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
[[LibCURL_jll]]
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
[[LibGit2]]
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
[[LibSSH2_jll]]
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
[[Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
[[LinearAlgebra]]
deps = ["Libdl"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
[[LogExpFunctions]]
deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"]
git-tree-sha1 = "3d682c07e6dd250ed082f883dc88aee7996bf2cc"
uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
version = "0.3.0"
[[Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
[[MacroTools]]
deps = ["Markdown", "Random"]
git-tree-sha1 = "0fb723cd8c45858c22169b2e42269e53271a6df7"
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
version = "0.5.7"
[[Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
[[MbedTLS_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
[[Memoize]]
deps = ["MacroTools"]
git-tree-sha1 = "2b1dfcba103de714d31c033b5dacc2e4a12c7caa"
uuid = "c03570c3-d221-55d1-a50c-7939bbd78826"
version = "0.4.4"
[[Mmap]]
uuid = "a63ad114-7e13-5084-954f-fe012c677804"
[[MozillaCACerts_jll]]
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
[[NEO_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "gmmlib_jll", "libigc_jll", "oneAPI_Level_Zero_Headers_jll"]
git-tree-sha1 = "c753dd029eb0837658bf8eaee041c19e4ce5bb8c"
uuid = "700fe977-ac61-5f37-bbc8-c6c4b2b6a9fd"
version = "21.12.19358+0"
[[NUMA_jll]]
deps = ["Libdl", "Pkg"]
git-tree-sha1 = "778f9bd14400cff2c32ed357e12766ac0e3d766e"
uuid = "7f51dc2b-bb24-59f8-b771-bb1490e4195d"
version = "2.0.13+1"
[[NetworkOptions]]
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
[[OpenSpecFun_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
version = "0.5.5+0"
[[OrderedCollections]]
git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.4.1"
[[Parameters]]
deps = ["OrderedCollections", "UnPack"]
git-tree-sha1 = "2276ac65f1e236e0a6ea70baff3f62ad4c625345"
uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
version = "0.12.2"
[[Pkg]]
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
[[Preferences]]
deps = ["TOML"]
git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a"
uuid = "21216c6a-2e73-6563-6e65-726566657250"
version = "1.2.2"
[[Printf]]
deps = ["Unicode"]
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
[[REPL]]
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
[[ROCKernels]]
deps = ["AMDGPU", "Adapt", "Cassette", "KernelAbstractions", "SpecialFunctions", "StaticArrays"]
git-tree-sha1 = "41105b861342637dde17797bdd9aaa537aca646b"
uuid = "7eb9e9f0-4bd3-4c4c-8bef-26bd9629d9b9"
version = "0.2.0"
[[Random]]
deps = ["Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
[[Random123]]
deps = ["Libdl", "Random", "RandomNumbers"]
git-tree-sha1 = "0e8b146557ad1c6deb1367655e052276690e71a3"
uuid = "74087812-796a-5b5d-8853-05524746bad3"
version = "1.4.2"
[[RandomNumbers]]
deps = ["Random", "Requires"]
git-tree-sha1 = "043da614cc7e95c703498a491e2c21f58a2b8111"
uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143"
version = "1.5.3"
[[Reexport]]
git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
uuid = "189a3867-3050-52da-a836-e630ba90ab69"
version = "1.2.2"
[[Requires]]
deps = ["UUIDs"]
git-tree-sha1 = "4036a3bd08ac7e968e27c203d45f5fff15020621"
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
version = "1.1.3"
[[SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
[[SPIRV_LLVM_Translator_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "8cca87d57f6ddf19373cc9791fddc741406c8fbf"
uuid = "4a5d46fc-d8cf-5151-a261-86b458210efb"
version = "11.0.0+2"
[[SPIRV_Tools_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "c0324b7e07bc4649f755bfe7e00f7c6ed6aa353f"
uuid = "6ac6d60f-d740-5983-97d7-a4482c0689f4"
version = "2021.2.0+0"
[[Scratch]]
deps = ["Dates"]
git-tree-sha1 = "0b4b7f1393cff97c33891da2a0bf69c6ed241fda"
uuid = "6c6a2e73-6563-6170-7368-637461726353"
version = "1.1.0"
[[Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
[[Setfield]]
deps = ["ConstructionBase", "Future", "MacroTools", "Requires"]
git-tree-sha1 = "fca29e68c5062722b5b4435594c3d1ba557072a3"
uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46"
version = "0.7.1"
[[SharedArrays]]
deps = ["Distributed", "Mmap", "Random", "Serialization"]
uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
[[Sockets]]
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
[[SparseArrays]]
deps = ["LinearAlgebra", "Random"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
[[SpecialFunctions]]
deps = ["ChainRulesCore", "LogExpFunctions", "OpenSpecFun_jll"]
git-tree-sha1 = "a322a9493e49c5f3a10b50df3aedaf1cdb3244b7"
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
version = "1.6.1"
[[StaticArrays]]
deps = ["LinearAlgebra", "Random", "Statistics"]
git-tree-sha1 = "3240808c6d463ac46f1c1cd7638375cd22abbccb"
uuid = "90137ffa-7385-5640-81b9-e52037218182"
version = "1.2.12"
[[Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[[TOML]]
deps = ["Dates"]
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
[[Tar]]
deps = ["ArgTools", "SHA"]
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
[[Test]]
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[[TextWrap]]
git-tree-sha1 = "9250ef9b01b66667380cf3275b3f7488d0e25faf"
uuid = "b718987f-49a8-5099-9789-dcd902bef87d"
version = "1.0.1"
[[TimerOutputs]]
deps = ["ExprTools", "Printf"]
git-tree-sha1 = "209a8326c4f955e2442c07b56029e88bb48299c7"
uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
version = "0.5.12"
[[UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
[[UnPack]]
git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
version = "1.0.2"
[[Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
[[XZ_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "a921669cd9a45c23031fd4eb904f5cc3d20de415"
uuid = "ffd25f8a-64ca-5728-b0f7-c24cf3aae800"
version = "5.2.5+2"
[[Zlib_jll]]
deps = ["Libdl"]
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
[[argp_standalone_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "feaf9f6293003c2bf53056fd6930d677ed340b34"
uuid = "c53206cc-00f7-50bf-ad1e-3ae1f6e49bc3"
version = "1.3.1+0"
[[fts_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "78732b942383d2cb521df8a1a0814911144e663d"
uuid = "d65627f6-89bd-53e8-8ab5-8b75ff535eee"
version = "1.2.7+1"
[[gmmlib_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "4067ef455d4fa67febe26efc3f9565a9bb7ba911"
uuid = "09858cae-167c-5acb-9302-fddc6874d481"
version = "20.3.2+0"
[[hsa_rocr_jll]]
deps = ["Artifacts", "Elfutils_jll", "JLLWrappers", "Libdl", "NUMA_jll", "Pkg", "Zlib_jll", "hsakmt_roct_jll"]
git-tree-sha1 = "42189f176d6ae4f37c0c0e652fec339bb0bfab5d"
uuid = "dd59ff1a-a01a-568d-8b29-0669330f116a"
version = "3.7.0+1"
[[hsakmt_roct_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "NUMA_jll", "Pkg"]
git-tree-sha1 = "8a9ee6c091e952e4ea6585d15131d43f789ae041"
uuid = "1cecccd7-a9b6-5045-9cdc-a44c19b16d76"
version = "3.8.0+0"
[[libigc_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "6140dbf267f7ab57fb791b49f2114374218b5c20"
uuid = "94295238-5935-5bd7-bb0f-b00942e9bdd5"
version = "1.0.6712+0"
[[nghttp2_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
[[obstack_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "1c4a6b66e934fc6db4649cb2910c72f53bbfea7e"
uuid = "c88a4935-d25e-5644-aacc-5db6f1b8ef79"
version = "1.2.2+0"
[[oneAPI]]
deps = ["Adapt", "CEnum", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "LinearAlgebra", "NEO_jll", "Printf", "Random", "SPIRV_LLVM_Translator_jll", "SPIRV_Tools_jll", "SpecialFunctions", "oneAPI_Level_Zero_Loader_jll"]
git-tree-sha1 = "b4a4b84c864e75fe885a1643525f0c97ce310dd9"
uuid = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"
version = "0.1.3"
[[oneAPI_Level_Zero_Headers_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "48982fbfd2f3d0a30d644563dcf96892d252b395"
uuid = "f4bc562b-d309-54f8-9efb-476e56f0410d"
version = "1.1.2+1"
[[oneAPI_Level_Zero_Loader_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "oneAPI_Level_Zero_Headers_jll"]
git-tree-sha1 = "1fa53dfdd32a732f09c254c86403e1abab653fb2"
uuid = "13eca655-d68d-5b81-8367-6d99d727ab01"
version = "1.3.6+0"
[[p7zip_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"

View File

@ -0,0 +1,19 @@
name = "JuliaStream"
uuid = "1bdcc9b7-f5ed-4705-bc7b-be1b748ec681"
authors = ["Wei-Chen Lin <wl14928@bristol.ac.uk>"]
version = "3.4.0"
[deps]
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
CUDAKernels = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
ROCKernels = "7eb9e9f0-4bd3-4c4c-8bef-26bd9629d9b9"
oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"
[compat]
julia = "1.6"

67
JuliaStream.jl/README.md Normal file
View File

@ -0,0 +1,67 @@
JuliaStream.jl
==============
This is an implementation of BabelStream in Julia which contains the following variants:
* `PlainStream.jl` - Single threaded `for`
* `ThreadedStream.jl` - Threaded implementation with `Threads.@threads` macros
* `DistributedStream.jl` - Process based parallelism with `@distributed` macros
* `CUDAStream.jl` - Direct port of BabelStream's native CUDA implementation using [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl)
* `AMDGPUStream.jl` - Direct port of BabelStream's native HIP implementation using [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl)
* `oneAPIStream.jl` - Direct port of BabelStream's native SYCL implementation using [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl)
* `KernelAbstractions.jl` - Direct port of miniBUDE's native CUDA implementation using [KernelAbstractions.jl](https://github.com/JuliaGPU/KernelAbstractions.jl)
### Build & Run
Prerequisites
* Julia >= 1.6+
A set of reduced dependency projects are available for the following backend and implementations:
* `AMDGPU` supports:
- `AMDGPUStream.jl`
* `CUDA` supports:
- `CUDAStream.jl`
* `oneAPI` supports:
- `oneAPIStream.jl`
* `KernelAbstractions` supports:
- `KernelAbstractionsStream.jl`
* `Threaded` supports:
- `PlainStream.jl`
- `ThreadedStream.jl`
- `DistributedStream.jl`
With Julia on path, run your selected benchmark with:
```shell
> cd JuliaStream.jl
> julia --project=<BACKEND> -e 'import Pkg; Pkg.instantiate()' # only required on first run
> julia --project=<BACKEND> src/<IMPL>Stream.jl
```
For example. to run the CUDA implementation:
```shell
> cd JuliaStream.jl
> julia --project=CUDA -e 'import Pkg; Pkg.instantiate()'
> julia --project=CUDA src/CUDAStream.jl
```
**Important:**
* Julia is 1-indexed, so N >= 1 in `--device N`.
* Thread count for `ThreadedStream` must be set via the `JULIA_NUM_THREADS` environment variable (e.g `export JULIA_NUM_THREADS=$(nproc)`) otherwise it defaults to 1.
* Worker count for `DistributedStream` is set with `-p <N>` as per the [documentation](https://docs.julialang.org/en/v1/manual/distributed-computing).
* Certain implementations such as CUDA and AMDGPU will do hardware detection at runtime and may download and/or compile further software packages for the platform.
***
Alternatively, the top-level project `Project.toml` contains all dependencies needed to run all implementations in `src`.
There may be instances where some packages are locked to an older version because of transitive dependency requirements.
To run the benchmark using the top-level project, run the benchmark with:
```shell
> cd JuliaStream.jl
> julia --project -e 'import Pkg; Pkg.instantiate()'
> julia --project src/<IMPL>Stream.jl
```

View File

@ -0,0 +1,31 @@
# This file is machine-generated - editing it directly is not advised
[[ArgParse]]
deps = ["Logging", "TextWrap"]
git-tree-sha1 = "3102bce13da501c9104df33549f511cd25264d7d"
uuid = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
version = "1.1.4"
[[Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
[[OrderedCollections]]
git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.4.1"
[[Parameters]]
deps = ["OrderedCollections", "UnPack"]
git-tree-sha1 = "2276ac65f1e236e0a6ea70baff3f62ad4c625345"
uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
version = "0.12.2"
[[TextWrap]]
git-tree-sha1 = "9250ef9b01b66667380cf3275b3f7488d0e25faf"
uuid = "b718987f-49a8-5099-9789-dcd902bef87d"
version = "1.0.1"
[[UnPack]]
git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
version = "1.0.2"

View File

@ -0,0 +1,6 @@
[deps]
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
[compat]
julia = "1.6"

View File

@ -0,0 +1,319 @@
# This file is machine-generated - editing it directly is not advised
[[Adapt]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "84918055d15b3114ede17ac6a7182f68870c16f7"
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
version = "3.3.1"
[[ArgParse]]
deps = ["Logging", "TextWrap"]
git-tree-sha1 = "3102bce13da501c9104df33549f511cd25264d7d"
uuid = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
version = "1.1.4"
[[ArgTools]]
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
[[Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
[[Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
[[CEnum]]
git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9"
uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
version = "0.4.1"
[[ChainRulesCore]]
deps = ["Compat", "LinearAlgebra", "SparseArrays"]
git-tree-sha1 = "bdc0937269321858ab2a4f288486cb258b9a0af7"
uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
version = "1.3.0"
[[Compat]]
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
git-tree-sha1 = "727e463cfebd0c7b999bbf3e9e7e16f254b94193"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "3.34.0"
[[CompilerSupportLibraries_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
[[Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
[[DelimitedFiles]]
deps = ["Mmap"]
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
[[Distributed]]
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
[[DocStringExtensions]]
deps = ["LibGit2"]
git-tree-sha1 = "a32185f5428d3986f47c2ab78b1f216d5e6cc96f"
uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
version = "0.8.5"
[[Downloads]]
deps = ["ArgTools", "LibCURL", "NetworkOptions"]
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
[[ExprTools]]
git-tree-sha1 = "b7e3d17636b348f005f11040025ae8c6f645fe92"
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
version = "0.1.6"
[[GPUArrays]]
deps = ["Adapt", "LinearAlgebra", "Printf", "Random", "Serialization", "Statistics"]
git-tree-sha1 = "8fac1cf7d6ce0f2249c7acaf25d22e1e85c4a07f"
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
version = "8.0.2"
[[GPUCompiler]]
deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"]
git-tree-sha1 = "4ed2616d5e656c8716736b64da86755467f26cf5"
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
version = "0.12.9"
[[InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
[[IrrationalConstants]]
git-tree-sha1 = "f76424439413893a832026ca355fe273e93bce94"
uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
version = "0.1.0"
[[JLLWrappers]]
deps = ["Preferences"]
git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
version = "1.3.0"
[[LLVM]]
deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"]
git-tree-sha1 = "23a47d417a3cd9c2e73c854bac7dd4731c105ef7"
uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
version = "4.4.0"
[[LLVMExtra_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "9c360e5ce980b88bb31a7b086dbb19469008154b"
uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab"
version = "0.0.10+0"
[[LibCURL]]
deps = ["LibCURL_jll", "MozillaCACerts_jll"]
uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
[[LibCURL_jll]]
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
[[LibGit2]]
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
[[LibSSH2_jll]]
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
[[Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
[[LinearAlgebra]]
deps = ["Libdl"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
[[LogExpFunctions]]
deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"]
git-tree-sha1 = "3d682c07e6dd250ed082f883dc88aee7996bf2cc"
uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
version = "0.3.0"
[[Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
[[Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
[[MbedTLS_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
[[Mmap]]
uuid = "a63ad114-7e13-5084-954f-fe012c677804"
[[MozillaCACerts_jll]]
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
[[NEO_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "gmmlib_jll", "libigc_jll", "oneAPI_Level_Zero_Headers_jll"]
git-tree-sha1 = "2bfc354b5684821dcc88f1e477cefd0dd03c60b5"
uuid = "700fe977-ac61-5f37-bbc8-c6c4b2b6a9fd"
version = "21.31.20514+0"
[[NetworkOptions]]
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
[[OpenSpecFun_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
version = "0.5.5+0"
[[OrderedCollections]]
git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.4.1"
[[Parameters]]
deps = ["OrderedCollections", "UnPack"]
git-tree-sha1 = "2276ac65f1e236e0a6ea70baff3f62ad4c625345"
uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
version = "0.12.2"
[[Pkg]]
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
[[Preferences]]
deps = ["TOML"]
git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a"
uuid = "21216c6a-2e73-6563-6e65-726566657250"
version = "1.2.2"
[[Printf]]
deps = ["Unicode"]
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
[[REPL]]
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
[[Random]]
deps = ["Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
[[SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
[[SPIRV_LLVM_Translator_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "8cca87d57f6ddf19373cc9791fddc741406c8fbf"
uuid = "4a5d46fc-d8cf-5151-a261-86b458210efb"
version = "11.0.0+2"
[[SPIRV_Tools_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "c0324b7e07bc4649f755bfe7e00f7c6ed6aa353f"
uuid = "6ac6d60f-d740-5983-97d7-a4482c0689f4"
version = "2021.2.0+0"
[[Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
[[SharedArrays]]
deps = ["Distributed", "Mmap", "Random", "Serialization"]
uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
[[Sockets]]
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
[[SparseArrays]]
deps = ["LinearAlgebra", "Random"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
[[SpecialFunctions]]
deps = ["ChainRulesCore", "LogExpFunctions", "OpenSpecFun_jll"]
git-tree-sha1 = "a322a9493e49c5f3a10b50df3aedaf1cdb3244b7"
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
version = "1.6.1"
[[Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[[TOML]]
deps = ["Dates"]
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
[[Tar]]
deps = ["ArgTools", "SHA"]
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
[[Test]]
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[[TextWrap]]
git-tree-sha1 = "9250ef9b01b66667380cf3275b3f7488d0e25faf"
uuid = "b718987f-49a8-5099-9789-dcd902bef87d"
version = "1.0.1"
[[TimerOutputs]]
deps = ["ExprTools", "Printf"]
git-tree-sha1 = "209a8326c4f955e2442c07b56029e88bb48299c7"
uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
version = "0.5.12"
[[UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
[[UnPack]]
git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
version = "1.0.2"
[[Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
[[Zlib_jll]]
deps = ["Libdl"]
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
[[gmmlib_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "0d5e5461d21b14853b4c332045c57d2601c403bd"
uuid = "09858cae-167c-5acb-9302-fddc6874d481"
version = "21.2.1+0"
[[libigc_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "b30a895e7ea52991a3f984ab0302c42858d766c0"
uuid = "94295238-5935-5bd7-bb0f-b00942e9bdd5"
version = "1.0.8173+0"
[[nghttp2_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
[[oneAPI]]
deps = ["Adapt", "CEnum", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "LinearAlgebra", "NEO_jll", "Printf", "Random", "SPIRV_LLVM_Translator_jll", "SPIRV_Tools_jll", "SpecialFunctions", "oneAPI_Level_Zero_Headers_jll", "oneAPI_Level_Zero_Loader_jll"]
git-tree-sha1 = "92e8eefdd4694597994590230ab329545804bdb3"
uuid = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"
version = "0.2.0"
[[oneAPI_Level_Zero_Headers_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "e1d123ff9ada6c469a1eaf57e33a74c3cb26a5a4"
uuid = "f4bc562b-d309-54f8-9efb-476e56f0410d"
version = "1.2.13+0"
[[oneAPI_Level_Zero_Loader_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "oneAPI_Level_Zero_Headers_jll"]
git-tree-sha1 = "50124857f7e87420655929a9c8ca86749826af11"
uuid = "13eca655-d68d-5b81-8367-6d99d727ab01"
version = "1.4.1+0"
[[p7zip_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"

View File

@ -0,0 +1,7 @@
[deps]
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"
[compat]
julia = "1.6"

View File

@ -0,0 +1,167 @@
# AMDGPU.jl doesn't support CPU agents, so this isn't a feature-complete ROCmStream, only AMD GPUs
include("Stream.jl")
using AMDGPU
const ROCData = StreamData{T,ROCArray{T}} where {T}
const TBSize = 1024::Int
const DotBlocks = 256::Int
function devices()::Vector{DeviceWithRepr}
try
# AMDGPU.agents()'s internal iteration order isn't stable
sorted = sort(AMDGPU.get_agents(:gpu), by = repr)
map(x -> (x, repr(x), "AMDGPU.jl"), sorted)
catch
# probably unsupported
String[]
end
end
function make_stream(
arraysize::Int,
scalar::T,
device::DeviceWithRepr,
silent::Bool,
)::Tuple{ROCData{T},Nothing} where {T}
if arraysize % TBSize != 0
error("arraysize ($(arraysize)) must be divisible by $(TBSize)!")
end
# XXX AMDGPU doesn't expose an API for setting the default like CUDA.device!()
# but AMDGPU.get_default_agent returns DEFAULT_AGENT so we can do it by hand
AMDGPU.DEFAULT_AGENT[] = device[1]
selected = AMDGPU.get_default_agent()
if !silent
println("Using GPU HSA device: $(AMDGPU.get_name(selected)) ($(repr(selected)))")
println("Kernel parameters : <<<$(arraysize),$(TBSize)>>>")
end
return (
ROCData{T}(
ROCArray{T}(undef, arraysize),
ROCArray{T}(undef, arraysize),
ROCArray{T}(undef, arraysize),
scalar,
arraysize,
),
nothing,
)
end
function init_arrays!(data::ROCData{T}, _, init::Tuple{T,T,T}) where {T}
AMDGPU.fill!(data.a, init[1])
AMDGPU.fill!(data.b, init[2])
AMDGPU.fill!(data.c, init[3])
end
function copy!(data::ROCData{T}, _) where {T}
function kernel(a::AbstractArray{T}, c::AbstractArray{T})
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
@inbounds c[i] = a[i]
return
end
AMDGPU.wait(
@roc groupsize = TBSize gridsize = data.size kernel(data.a, data.c)
)
end
function mul!(data::ROCData{T}, _) where {T}
function kernel(b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
@inbounds b[i] = scalar * c[i]
return
end
AMDGPU.wait(
@roc groupsize = TBSize gridsize = data.size kernel(data.b, data.c, data.scalar)
)
end
function add!(data::ROCData{T}, _) where {T}
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T})
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
@inbounds c[i] = a[i] + b[i]
return
end
AMDGPU.wait(
@roc groupsize = TBSize gridsize = data.size kernel(data.a, data.b, data.c)
)
end
function triad!(data::ROCData{T}, _) where {T}
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
@inbounds a[i] = b[i] + (scalar * c[i])
return
end
AMDGPU.wait(
@roc groupsize = TBSize gridsize = data.size kernel(
data.a,
data.b,
data.c,
data.scalar,
)
)
end
function nstream!(data::ROCData{T}, _) where {T}
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
@inbounds a[i] += b[i] + scalar * c[i]
return
end
AMDGPU.wait(
@roc groupsize = TBSize gridsize = data.size kernel(
data.a,
data.b,
data.c,
data.scalar,
)
)
end
function dot(data::ROCData{T}, _) where {T}
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, size::Int, partial::AbstractArray{T})
tb_sum = ROCDeviceArray((TBSize,), alloc_local(:reduce, T, TBSize))
local_i = workitemIdx().x
@inbounds tb_sum[local_i] = 0.0
# do dot first
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
while i <= size
@inbounds tb_sum[local_i] += a[i] * b[i]
i += TBSize * DotBlocks # XXX don't use (workgroupDim().x * gridDimWG().x) here
end
# then tree reduction
offset = workgroupDim().x ÷ 2
while offset > 0
sync_workgroup()
if (local_i - 1) < offset
@inbounds tb_sum[local_i] += tb_sum[local_i+offset]
end
offset ÷= 2
end
if (local_i == 1)
@inbounds partial[workgroupIdx().x] = tb_sum[local_i]
end
return
end
partial_sum = ROCArray{T}(undef, DotBlocks)
AMDGPU.wait(
@roc groupsize = TBSize gridsize = TBSize * DotBlocks kernel(
data.a,
data.b,
data.size,
partial_sum,
)
)
return sum(partial_sum)
end
function read_data(data::ROCData{T}, _)::VectorData{T} where {T}
return VectorData{T}(data.a, data.b, data.c, data.scalar, data.size)
end
main()

View File

@ -0,0 +1,152 @@
include("Stream.jl")
using CUDA
const CuData = StreamData{T,CuArray{T}} where {T}
const TBSize = 1024::Int
const DotBlocks = 256::Int
function devices()::Vector{DeviceWithRepr}
return !CUDA.functional(false) ? String[] :
map(d -> (d, "$(CUDA.name(d)) ($(repr(d)))", "CUDA.jl"), CUDA.devices())
end
function make_stream(
arraysize::Int,
scalar::T,
device::DeviceWithRepr,
silent::Bool,
)::Tuple{CuData{T},Nothing} where {T}
if arraysize % TBSize != 0
error("arraysize ($(arraysize)) must be divisible by $(TBSize)!")
end
CUDA.device!(device[1])
selected = CUDA.device()
# show_reason is set to true here so it dumps CUDA info
# for us regardless of whether it's functional
if !CUDA.functional(true)
error("Non-functional CUDA configuration")
end
if !silent
println("Using CUDA device: $(CUDA.name(selected)) ($(repr(selected)))")
println("Kernel parameters: <<<$(arraysize ÷ TBSize),$(TBSize)>>>")
end
return (
CuData{T}(
CuArray{T}(undef, arraysize),
CuArray{T}(undef, arraysize),
CuArray{T}(undef, arraysize),
scalar,
arraysize,
),
nothing,
)
end
function init_arrays!(data::CuData{T}, _, init::Tuple{T,T,T}) where {T}
fill!(data.a, init[1])
fill!(data.b, init[2])
fill!(data.c, init[3])
end
function copy!(data::CuData{T}, _) where {T}
function kernel(a::AbstractArray{T}, c::AbstractArray{T})
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
@inbounds c[i] = a[i]
return
end
@cuda blocks = data.size ÷ TBSize threads = TBSize kernel(data.a, data.c)
CUDA.synchronize()
end
function mul!(data::CuData{T}, _) where {T}
function kernel(b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
@inbounds b[i] = scalar * c[i]
return
end
@cuda blocks = data.size ÷ TBSize threads = TBSize kernel(data.b, data.c, data.scalar)
CUDA.synchronize()
end
function add!(data::CuData{T}, _) where {T}
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T})
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
@inbounds c[i] = a[i] + b[i]
return
end
@cuda blocks = data.size ÷ TBSize threads = TBSize kernel(data.a, data.b, data.c)
CUDA.synchronize()
end
function triad!(data::CuData{T}, _) where {T}
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
@inbounds a[i] = b[i] + (scalar * c[i])
return
end
@cuda blocks = data.size ÷ TBSize threads = TBSize kernel(
data.a,
data.b,
data.c,
data.scalar,
)
CUDA.synchronize()
end
function nstream!(data::CuData{T}, _) where {T}
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
@inbounds a[i] += b[i] + scalar * c[i]
return
end
@cuda blocks = data.size ÷ TBSize threads = TBSize kernel(
data.a,
data.b,
data.c,
data.scalar,
)
CUDA.synchronize()
end
function dot(data::CuData{T}, _) where {T}
# direct port of the reduction in CUDAStream.cu
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, size::Int, partial::AbstractArray{T})
tb_sum = @cuStaticSharedMem(T, TBSize)
local_i = threadIdx().x
@inbounds tb_sum[local_i] = 0.0
# do dot first
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
while i <= size
@inbounds tb_sum[local_i] += a[i] * b[i]
i += blockDim().x * gridDim().x
end
# then tree reduction
offset = blockDim().x ÷ 2
while offset > 0
sync_threads()
if (local_i - 1) < offset
@inbounds tb_sum[local_i] += tb_sum[local_i+offset]
end
offset ÷= 2
end
if (local_i == 1)
@inbounds partial[blockIdx().x] = tb_sum[local_i]
end
return
end
partial_sum = CuArray{T}(undef, DotBlocks)
@cuda blocks = DotBlocks threads = TBSize kernel(data.a, data.b, data.size, partial_sum)
return sum(partial_sum)
end
function read_data(data::CuData{T}, _)::VectorData{T} where {T}
return VectorData{T}(data.a, data.b, data.c, data.scalar, data.size)
end
main()

View File

@ -0,0 +1,85 @@
using Distributed
@everywhere using Pkg
@everywhere Pkg.activate("."; io = devnull) # don't spam `Activating environment at...`
@everywhere include("StreamData.jl")
@everywhere include("Stream.jl")
@everywhere using SharedArrays
@everywhere const SharedArrayData = StreamData{T,SharedArray{T}} where {T}
function devices()::Vector{DeviceWithRepr}
return [(undef, "CPU (localhost) $(nworkers())P", "Distributed.jl")]
end
function make_stream(
arraysize::Int,
scalar::T,
_::DeviceWithRepr,
silent::Bool,
)::Tuple{SharedArrayData{T},Nothing} where {T}
if !silent
println("Using max $(nworkers()) process(es) + 1 master")
end
return (
SharedArrayData{T}(
SharedArray{T}(arraysize),
SharedArray{T}(arraysize),
SharedArray{T}(arraysize),
scalar,
arraysize,
),
nothing,
)
end
function init_arrays!(data::SharedArrayData{T}, _, init::Tuple{T,T,T}) where {T}
@sync @distributed for i = 1:data.size
@inbounds data.a[i] = init[1]
@inbounds data.b[i] = init[2]
@inbounds data.c[i] = init[3]
end
end
function copy!(data::SharedArrayData{T}, _) where {T}
@sync @distributed for i = 1:data.size
@inbounds data.c[i] = data.a[i]
end
end
function mul!(data::SharedArrayData{T}, _) where {T}
@sync @distributed for i = 1:data.size
@inbounds data.b[i] = data.scalar * data.c[i]
end
end
function add!(data::SharedArrayData{T}, _) where {T}
@sync @distributed for i = 1:data.size
@inbounds data.c[i] = data.a[i] + data.b[i]
end
end
function triad!(data::SharedArrayData{T}, _) where {T}
@sync @distributed for i = 1:data.size
@inbounds data.a[i] = data.b[i] + (data.scalar * data.c[i])
end
end
function nstream!(data::SharedArrayData{T}, _) where {T}
@sync @distributed for i = 1:data.size
@inbounds data.a[i] += data.b[i] + data.scalar * data.c[i]
end
end
function dot(data::SharedArrayData{T}, _) where {T}
return @distributed (+) for i = 1:data.size
@inbounds data.a[i] * data.b[i]
end
end
function read_data(data::SharedArrayData{T}, _)::VectorData{T} where {T}
return VectorData{T}(data.a, data.b, data.c, data.scalar, data.size)
end
main()

View File

@ -0,0 +1,4 @@
module JuliaStream
end
println("Please run benchmarks directly via `julia --project src/<IMPL>Stream.jl`")

View File

@ -0,0 +1,255 @@
using ROCKernels, CUDAKernels, KernelAbstractions, CUDA, AMDGPU
include("Stream.jl")
const CuData = StreamData{T,CUDA.CuArray{T}} where {T}
const ROCData = StreamData{T,AMDGPU.ROCArray{T}} where {T}
const TBSize = 1024::Int
const DotBlocks = 256::Int
@enum Backend cuda rocm cpu
struct Context
backend::Backend
device::Device
end
function list_rocm_devices()::Vector{DeviceWithRepr}
try
# AMDGPU.agents()'s internal iteration order isn't stable
sorted = sort(AMDGPU.get_agents(:gpu), by = repr)
map(x -> (x, repr(x), rocm), sorted)
catch
# probably unsupported
[]
end
end
function list_cuda_devices()::Vector{DeviceWithRepr}
return !CUDA.functional(false) ? String[] :
map(d -> (d, "$(CUDA.name(d)) ($(repr(d)))", cuda), CUDA.devices())
end
function devices()::Vector{DeviceWithRepr}
cudas = list_cuda_devices()
rocms = list_rocm_devices()
cpus = [(undef, "$(Sys.cpu_info()[1].model) ($(Threads.nthreads())T)", cpu)]
vcat(cpus, cudas, rocms)
end
function make_stream(
arraysize::Int,
scalar::T,
device::DeviceWithRepr,
silent::Bool,
) where {T}
if arraysize % TBSize != 0
error("arraysize ($(arraysize)) must be divisible by $(TBSize)!")
end
(selected, _, backend) = device
if backend == cpu
if !silent
println("Using CPU with max $(Threads.nthreads()) threads")
end
partialsum = Vector{T}(undef, DotBlocks)
data = VectorData{T}(
Vector{T}(undef, arraysize),
Vector{T}(undef, arraysize),
Vector{T}(undef, arraysize),
scalar,
arraysize,
)
backenddevice = CPU()
elseif backend == cuda
CUDA.device!(selected)
if CUDA.device() != selected
error("Cannot select CUDA device, expecting $selected, but got $(CUDA.device())")
end
if !CUDA.functional(true)
error("Non-functional CUDA configuration")
end
if !silent
println("Using CUDA device: $(CUDA.name(selected)) ($(repr(selected)))")
end
partialsum = CuArray{T}(undef, DotBlocks)
data = CuData{T}(
CuArray{T}(undef, arraysize),
CuArray{T}(undef, arraysize),
CuArray{T}(undef, arraysize),
scalar,
arraysize,
)
backenddevice = CUDADevice()
elseif backend == rocm
AMDGPU.DEFAULT_AGENT[] = selected
if AMDGPU.get_default_agent() != selected
error(
"Cannot select HSA device, expecting $selected, but got $(AMDGPU.get_default_agent())",
)
end
if !silent
println("Using GPU HSA device: $(AMDGPU.get_name(selected)) ($(repr(selected)))")
end
partialsum = ROCArray{T}(undef, DotBlocks)
data = ROCData{T}(
ROCArray{T}(undef, arraysize),
ROCArray{T}(undef, arraysize),
ROCArray{T}(undef, arraysize),
scalar,
arraysize,
)
backenddevice = ROCDevice()
else
error("unsupported backend $(backend)")
end
if !silent
println("Kernel parameters : <<<$(data.size),$(TBSize)>>>")
end
return (data, Context(backend, backenddevice))
end
function init_arrays!(
data::StreamData{T,C},
context::Context,
init::Tuple{T,T,T},
) where {T,C}
if context.backend == cpu
Threads.@threads for i = 1:data.size
@inbounds data.a[i] = init[1]
@inbounds data.b[i] = init[2]
@inbounds data.c[i] = init[3]
end
elseif context.backend == cuda
CUDA.fill!(data.a, init[1])
CUDA.fill!(data.b, init[2])
CUDA.fill!(data.c, init[3])
elseif context.backend == rocm
AMDGPU.fill!(data.a, init[1])
AMDGPU.fill!(data.b, init[2])
AMDGPU.fill!(data.c, init[3])
else
error("unsupported backend $(backend)")
end
end
function copy!(data::StreamData{T,C}, context::Context) where {T,C}
@kernel function kernel(@Const(a::AbstractArray{T}), c)
i = @index(Global)
@inbounds c[i] = a[i]
end
wait(kernel(context.device, TBSize)(data.a, data.c, ndrange = data.size))
end
function mul!(data::StreamData{T,C}, context::Context) where {T,C}
@kernel function kernel(b::AbstractArray{T}, @Const(c::AbstractArray{T}), scalar::T)
i = @index(Global)
@inbounds b[i] = scalar * c[i]
end
wait(kernel(context.device, TBSize)(data.b, data.c, data.scalar, ndrange = data.size))
end
function add!(data::StreamData{T,C}, context::Context) where {T,C}
@kernel function kernel(@Const(a::AbstractArray{T}), @Const(b::AbstractArray{T}), c)
i = @index(Global)
@inbounds c[i] = a[i] + b[i]
end
wait(kernel(context.device, TBSize)(data.a, data.b, data.c, ndrange = data.size))
end
function triad!(data::StreamData{T,C}, context::Context) where {T,C}
@kernel function kernel(a::AbstractArray{T}, @Const(b::AbstractArray{T}), @Const(c), scalar::T)
i = @index(Global)
@inbounds a[i] = b[i] + (scalar * c[i])
end
wait(
kernel(context.device, TBSize)(
data.a,
data.b,
data.c,
data.scalar,
ndrange = data.size,
),
)
end
function nstream!(data::StreamData{T,C}, context::Context) where {T,C}
@kernel function kernel(a::AbstractArray{T}, @Const(b::AbstractArray{T}), @Const(c), scalar::T)
i = @index(Global)
@inbounds a[i] += b[i] + scalar * c[i]
end
wait(
kernel(context.device, TBSize)(
data.a,
data.b,
data.c,
data.scalar,
ndrange = data.size,
),
)
end
function dot(data::StreamData{T,C}, context::Context) where {T,C}
@kernel function kernel(@Const(a::AbstractArray{T}), @Const(b::AbstractArray{T}), size::Int, partial::AbstractArray{T})
local_i = @index(Local)
group_i = @index(Group)
tb_sum = @localmem T TBSize
@inbounds tb_sum[local_i] = 0.0
# do dot first
i = @index(Global)
while i <= size
@inbounds tb_sum[local_i] += a[i] * b[i]
i += TBSize * DotBlocks
end
# then tree reduction
# FIXME this does not compile when targeting CPUs:
# see https://github.com/JuliaGPU/KernelAbstractions.jl/issues/262
offset = @private Int64 (1,)
@inbounds begin
offset[1] = @groupsize()[1] ÷ 2
while offset[1] > 0
@synchronize
if (local_i - 1) < offset[1]
tb_sum[local_i] += tb_sum[local_i+offset[1]]
end
offset[1] ÷= 2
end
end
if (local_i == 1)
@inbounds partial[group_i] = tb_sum[local_i]
end
end
if context.backend == cpu
partial_sum = Vector{T}(undef, DotBlocks)
elseif context.backend == cuda
partial_sum = CuArray{T}(undef, DotBlocks)
elseif context.backend == rocm
partial_sum = ROCArray{T}(undef, DotBlocks)
else
error("unsupported backend $(backend)")
end
wait(
kernel(context.device, TBSize)(
data.a,
data.b,
data.size,
partial_sum,
ndrange = TBSize * DotBlocks,
),
)
return sum(partial_sum)
end
function read_data(data::StreamData{T,C}, _::Context)::VectorData{T} where {T,C}
return VectorData{T}(data.a, data.b, data.c, data.scalar, data.size)
end
main()

View File

@ -0,0 +1,75 @@
include("Stream.jl")
function devices()::Vector{DeviceWithRepr}
return [(undef, "CPU", "Palin")]
end
function make_stream(
arraysize::Int,
scalar::T,
_::DeviceWithRepr,
silent::Bool,
)::Tuple{VectorData{T},Nothing} where {T}
return (
VectorData{T}(
Vector{T}(undef, arraysize),
Vector{T}(undef, arraysize),
Vector{T}(undef, arraysize),
scalar,
arraysize,
),
nothing
)
end
function init_arrays!(data::VectorData{T}, _, init::Tuple{T,T,T}) where {T}
for i = 1:data.size
@inbounds data.a[i] = init[1]
@inbounds data.b[i] = init[2]
@inbounds data.c[i] = init[3]
end
end
function copy!(data::VectorData{T}, _) where {T}
for i = 1:data.size
@inbounds data.c[i] = data.a[i]
end
end
function mul!(data::VectorData{T}, _) where {T}
for i = 1:data.size
@inbounds data.b[i] = data.scalar * data.c[i]
end
end
function add!(data::VectorData{T}, _) where {T}
for i = 1:data.size
@inbounds data.c[i] = data.a[i] + data.b[i]
end
end
function triad!(data::VectorData{T}, _) where {T}
for i = 1:data.size
@inbounds data.a[i] = data.b[i] + (data.scalar * data.c[i])
end
end
function nstream!(data::VectorData{T}, _) where {T}
for i = 1:data.size
@inbounds data.a[i] += data.b[i] + data.scalar * data.c[i]
end
end
function dot(data::VectorData{T}, _) where {T}
sum = zero(T)
for i = 1:data.size
@inbounds sum += data.a[i] * data.b[i]
end
return sum
end
function read_data(data::VectorData{T}, _)::VectorData{T} where {T}
return data
end
main()

View File

@ -0,0 +1,300 @@
using ArgParse
using Parameters
using Printf
using Base: Float64, Int
include("StreamData.jl")
const VectorData = StreamData{T,Vector{T}} where {T}
const DeviceWithRepr = Tuple{Any,String,Any}
struct Timings
copy::Vector{Float64}
mul::Vector{Float64}
add::Vector{Float64}
triad::Vector{Float64}
dot::Vector{Float64}
Timings(n) = new(zeros(n), zeros(n), zeros(n), zeros(n), zeros(n))
end
@enum Benchmark All Triad Nstream
function run_all!(data::StreamData{T,C}, context, times::Int)::Tuple{Timings,T} where {T,C}
timings = Timings(times)
lastSum::T = 0
for i = 1:times
@inbounds timings.copy[i] = @elapsed copy!(data, context)
@inbounds timings.mul[i] = @elapsed mul!(data, context)
@inbounds timings.add[i] = @elapsed add!(data, context)
@inbounds timings.triad[i] = @elapsed triad!(data, context)
@inbounds timings.dot[i] = @elapsed lastSum = dot(data, context)
end
return (timings, lastSum)
end
function run_triad!(data::StreamData{T,C}, context, times::Int)::Float64 where {T,C}
return @elapsed for _ = 1:times
triad!(data, context)
end
end
function run_nstream!(
data::StreamData{T,C},
context,
times::Int,
)::Vector{Float64} where {T,C}
timings::Vector{Float64} = zeros(times)
for i = 1:times
@inbounds timings[i] = @elapsed nstream!(data, context)
end
return timings
end
function check_solutions(
data::StreamData{T,C},
times::Int,
init::Tuple{T,T,T},
benchmark::Benchmark,
dot::Union{T,Nothing},
) where {T,C}
(gold_a, gold_b, gold_c) = init
for _ = 1:times
if benchmark == All
gold_c = gold_a
gold_b = data.scalar * gold_c
gold_c = gold_a + gold_b
gold_a = gold_b + data.scalar * gold_c
elseif benchmark == Triad
gold_a = gold_b + data.scalar * gold_c
elseif benchmark == Nstream
gold_a += gold_b + data.scalar * gold_c
else
error("Unknown benchmark", benchmark)
end
end
tolerance = eps(T) * 100
function validate_xs(name::String, xs::AbstractArray{T}, from::T)
error = (map(x -> abs(x - from), xs) |> sum) / length(xs)
failed = error > tolerance
if failed
println("Validation failed on $name. Average error $error")
end
!failed
end
a_valid = validate_xs("a", data.a, gold_a)
b_valid = validate_xs("b", data.b, gold_b)
c_valid = validate_xs("c", data.c, gold_c)
dot_valid =
dot !== nothing ?
begin
gold_sum = gold_a * gold_b * data.size
error = abs((dot - gold_sum) / gold_sum)
failed = error > 1.0e-8
if failed
println(
"Validation failed on sum. Error $error \nSum was $dot but should be $gold_sum",
)
end
!failed
end : true
a_valid && b_valid && c_valid && dot_valid
end
@with_kw mutable struct Config
list::Bool = false
device::Int = 1
numtimes::Int = 100
arraysize::Int = 33554432
float::Bool = false
triad_only::Bool = false
nstream_only::Bool = false
csv::Bool = false
mibibytes::Bool = false
end
function parse_options(given::Config)
s = ArgParseSettings()
@add_arg_table s begin
"--list"
help = "List available devices"
action = :store_true
"--device", "-d"
help = "Select device at DEVICE, NOTE: Julia is 1-indexed"
arg_type = Int
default = given.device
"--numtimes", "-n"
help = "Run the test NUMTIMES times (NUM >= 2)"
arg_type = Int
default = given.numtimes
"--arraysize", "-s"
help = "Use ARRAYSIZE elements in the array"
arg_type = Int
default = given.arraysize
"--float"
help = "Use floats (rather than doubles)"
action = :store_true
"--triad_only"
help = "Only run triad"
action = :store_true
"--nstream_only"
help = "Only run nstream"
action = :store_true
"--csv"
help = "Output as csv table"
action = :store_true
"--mibibytes"
help = "Use MiB=2^20 for bandwidth calculation (default MB=10^6)"
action = :store_true
end
args = parse_args(s)
# surely there's a better way than doing this:
for (arg, val) in args
setproperty!(given, Symbol(arg), val)
end
end
const DefaultInit = (0.1, 0.2, 0.0)
const DefaultScalar = 0.4
const Version = "3.4.0"
function main()
config::Config = Config()
parse_options(config)
if config.list
for (i, (_,repr, impl)) in enumerate(devices())
println("[$i] ($impl) $repr")
end
exit(0)
end
ds = devices()
# TODO implement substring device match
if config.device < 1 || config.device > length(ds)
error(
"Device $(config.device) out of range (1..$(length(ds))), NOTE: Julia is 1-indexed",
)
else
device = ds[config.device]
end
type = config.float ? Float32 : Float64
if config.nstream_only && !config.triad_only
benchmark = Nstream
elseif !config.nstream_only && config.triad_only
benchmark = Triad
elseif !config.nstream_only && !config.triad_only
benchmark = All
elseif config.nstream_only && config.triad_only
error("Both triad and nstream are enabled, pick one or omit both to run all benchmarks")
else
error("Invalid config: $(repr(config))")
end
array_bytes = config.arraysize * sizeof(type)
total_bytes = array_bytes * 3
(mega_scale, mega_suffix, giga_scale, giga_suffix) =
!config.mibibytes ? (1.0e-6, "MB", 1.0e-9, "GB") : (2^-20, "MiB", 2^-30, "GiB")
if !config.csv
println("""BabelStream
Version: $Version
Implementation: Julia; $(PROGRAM_FILE)""")
println("Running kernels $(config.numtimes) times")
if benchmark == Triad
println("Number of elements: $(config.arraysize)")
end
println("Precision: $(config.float ? "float" : "double")")
r1 = n -> round(n; digits = 1)
println(
"Array size: $(r1(mega_scale * array_bytes)) $mega_suffix(=$(r1(giga_scale * array_bytes)) $giga_suffix)",
)
println(
"Total size: $(r1(mega_scale * total_bytes)) $mega_suffix(=$(r1(giga_scale * total_bytes)) $giga_suffix)",
)
end
function mk_row(xs::Vector{Float64}, name::String, total_bytes::Int)
tail = Base.rest(xs)
min = Iterators.minimum(tail)
max = Iterators.maximum(tail)
avg = Iterators.sum(tail) / Iterators.length(tail)
mbps = mega_scale * total_bytes / min
if config.csv
return [
("function", name),
("num_times", config.numtimes),
("n_elements", config.arraysize),
("sizeof", total_bytes),
("max_m$( config.mibibytes ? "i" : "")bytes_per_sec", mbps),
("min_runtime", min),
("max_runtime", max),
("avg_runtime", avg),
]
else
return [
("Function", name),
("M$(config.mibibytes ? "i" : "")Bytes/sec", round(mbps; digits = 3)),
("Min (sec)", round(min; digits = 5)),
("Max", round(max; digits = 5)),
("Average", round(avg; digits = 5)),
]
end
end
function tabulate(rows::Vector{Tuple{String,Any}}...)
header = Base.first(rows)
padding = config.csv ? 0 : 12
sep = config.csv ? "," : ""
map(x -> rpad(x[1], padding), header) |> x -> join(x, sep) |> println
for row in rows
map(x -> rpad(x[2], padding), row) |> x -> join(x, sep) |> println
end
end
init::Tuple{type,type,type} = DefaultInit
scalar::type = DefaultScalar
GC.enable(false)
(data, context) = make_stream(config.arraysize, scalar, device, config.csv)
init_arrays!(data, context, init)
if benchmark == All
(timings, sum) = run_all!(data, context, config.numtimes)
valid = check_solutions(read_data(data, context), config.numtimes, init, benchmark, sum)
tabulate(
mk_row(timings.copy, "Copy", 2 * array_bytes),
mk_row(timings.mul, "Mul", 2 * array_bytes),
mk_row(timings.add, "Add", 3 * array_bytes),
mk_row(timings.triad, "Triad", 3 * array_bytes),
mk_row(timings.dot, "Dot", 2 * array_bytes),
)
elseif benchmark == Nstream
timings = run_nstream!(data, context, config.numtimes)
valid =
check_solutions(read_data(data, context), config.numtimes, init, benchmark, nothing)
tabulate(mk_row(timings, "Nstream", 4 * array_bytes))
elseif benchmark == Triad
elapsed = run_triad!(data, context, config.numtimes)
valid =
check_solutions(read_data(data, context), config.numtimes, init, benchmark, nothing)
total_bytes = 3 * array_bytes * config.numtimes
bandwidth = mega_scale * (total_bytes / elapsed)
println("Runtime (seconds): $(round(elapsed; digits=5))")
println("Bandwidth ($giga_suffix/s): $(round(bandwidth; digits=3)) ")
else
error("Bad benchmark $(benchmark)")
end
GC.enable(true)
if !valid
exit(1)
end
end

View File

@ -0,0 +1,7 @@
struct StreamData{T,C<:AbstractArray{T}}
a::C
b::C
c::C
scalar::T
size::Int
end

View File

@ -0,0 +1,112 @@
include("Stream.jl")
function devices()::Vector{DeviceWithRepr}
return [(undef, "$(Sys.cpu_info()[1].model) ($(Threads.nthreads())T)", "Threaded")]
end
function make_stream(
arraysize::Int,
scalar::T,
_::DeviceWithRepr,
silent::Bool,
)::Tuple{VectorData{T},Nothing} where {T}
if !silent
println("Using max $(Threads.nthreads()) threads")
end
return (
VectorData{T}(
Vector{T}(undef, arraysize),
Vector{T}(undef, arraysize),
Vector{T}(undef, arraysize),
scalar,
arraysize,
),
nothing
)
end
function init_arrays!(data::VectorData{T}, _, init::Tuple{T,T,T}) where {T}
Threads.@threads for i = 1:data.size
@inbounds data.a[i] = init[1]
@inbounds data.b[i] = init[2]
@inbounds data.c[i] = init[3]
end
end
function copy!(data::VectorData{T}, _) where {T}
Threads.@threads for i = 1:data.size
@inbounds data.c[i] = data.a[i]
end
end
function mul!(data::VectorData{T}, _) where {T}
Threads.@threads for i = 1:data.size
@inbounds data.b[i] = data.scalar * data.c[i]
end
end
function add!(data::VectorData{T}, _) where {T}
Threads.@threads for i = 1:data.size
@inbounds data.c[i] = data.a[i] + data.b[i]
end
end
function triad!(data::VectorData{T}, _) where {T}
Threads.@threads for i = 1:data.size
@inbounds data.a[i] = data.b[i] + (data.scalar * data.c[i])
end
end
function nstream!(data::VectorData{T}, _) where {T}
Threads.@threads for i = 1:data.size
@inbounds data.a[i] += data.b[i] + data.scalar * data.c[i]
end
end
# Threads.@threads/Threads.@spawn doesn't support OpenMP's firstprivate, etc
function static_par_ranged(f::Function, range::Int, n::Int)
stride = range ÷ n
rem = range % n
strides = map(0:n) do i
width = stride + (i < rem ? 1 : 0)
offset = i < rem ? (stride + 1) * i : ((stride + 1) * rem) + (stride * (i - rem))
(offset, width)
end
ccall(:jl_enter_threaded_region, Cvoid, ())
try
foreach(wait, map(1:n) do group
(offset, size) = strides[group]
task = Task(() -> f(group, offset+1, offset+size))
task.sticky = true
ccall(:jl_set_task_tid, Cvoid, (Any, Cint), task, group-1) # ccall, so 0-based for group
schedule(task)
end)
finally
ccall(:jl_exit_threaded_region, Cvoid, ())
end
end
function dot(data::VectorData{T}, _) where {T}
partial = Vector{T}(undef, Threads.nthreads())
static_par_ranged(data.size, Threads.nthreads()) do group, startidx, endidx
acc = zero(T)
@simd for i = startidx:endidx
@inbounds acc += data.a[i] * data.b[i]
end
@inbounds partial[group] = acc
end
return sum(partial)
# This doesn't do well on aarch64 because of the excessive Threads.threadid() ccall
# and inhibited vectorisation from the lack of @simd
# partial = zeros(T, Threads.nthreads())
# Threads.@threads for i = 1:data.size
# @inbounds partial[Threads.threadid()] += (data.a[i] * data.b[i])
# end
# return sum(partial)
end
function read_data(data::VectorData{T}, _)::VectorData{T} where {T}
return data
end
main()

View File

@ -0,0 +1,170 @@
using Base.Iterators: println
using Base.Iterators: println
using Printf: Iterators
include("Stream.jl")
using oneAPI
const oneData = StreamData{T,oneArray{T}} where {T}
const DotWGSize = 256::Int
function devices()::Vector{DeviceWithRepr}
all = map(oneL0.devices, oneL0.drivers()) |> Iterators.flatten |> Iterators.collect
map(dev -> (dev, repr("text/plain", dev), "oneAPi.jl"), all)
end
function make_stream(
arraysize::Int,
scalar::T,
device::DeviceWithRepr,
silent::Bool,
)::Tuple{oneData{T},Int} where {T}
oneAPI.allowscalar(false)
oneAPI.device!(device[1])
props = oneL0.compute_properties(oneAPI.device())
groupsize = min(props.maxTotalGroupSize, arraysize)
if arraysize % groupsize != 0
error("arraysize ($(arraysize)) must be divisible by $(groupsize)!")
end
if !silent
println("Using L0 device: $(repr("text/plain",device[1]))")
println("Kernel parameters : <<<$(arraysize),$(groupsize)>>>")
end
return (
oneData{T}(
oneArray{T}(undef, arraysize),
oneArray{T}(undef, arraysize),
oneArray{T}(undef, arraysize),
scalar,
arraysize,
),
groupsize,
)
end
function init_arrays!(data::oneData{T}, _, init::Tuple{T,T,T}) where {T}
oneAPI.fill!(data.a, init[1])
oneAPI.fill!(data.b, init[2])
oneAPI.fill!(data.c, init[3])
end
function copy!(data::oneData{T}, groupsize::Int) where {T}
function kernel(a::AbstractArray{T}, c::AbstractArray{T})
i = get_global_id()
@inbounds c[i] = a[i]
return
end
@oneapi items = groupsize groups = data.size ÷ groupsize kernel( #
data.a,
data.c,
)
oneAPI.synchronize()
end
function mul!(data::oneData{T}, groupsize::Int) where {T}
function kernel(b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
i = get_global_id()
@inbounds b[i] = scalar * c[i]
return
end
@oneapi items = groupsize groups = data.size ÷ groupsize kernel( #
data.b,
data.c,
data.scalar,
)
oneAPI.synchronize()
end
function add!(data::oneData{T}, groupsize::Int) where {T}
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T})
i = get_global_id()
@inbounds c[i] = a[i] + b[i]
return
end
@oneapi items = groupsize groups = data.size ÷ groupsize kernel( #
data.a,
data.b,
data.c,
)
oneAPI.synchronize()
end
function triad!(data::oneData{T}, groupsize::Int) where {T}
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
i = get_global_id()
@inbounds a[i] = b[i] + (scalar * c[i])
return
end
@oneapi items = groupsize groups = data.size ÷ groupsize kernel( #
data.a,
data.b,
data.c,
data.scalar,
)
oneAPI.synchronize()
end
function nstream!(data::oneData{T}, groupsize::Int) where {T}
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
i = get_global_id()
@inbounds a[i] += b[i] + scalar * c[i]
return
end
@oneapi items = groupsize groups = data.size ÷ groupsize kernel( #
data.a,
data.b,
data.c,
data.scalar,
)
oneAPI.synchronize()
end
function dot(data::oneData{T}, groupsize::Int) where {T}
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, size::Int, partial::AbstractArray{T})
wg_sum = @LocalMemory(T, (DotWGSize,))
li = get_local_id()
@inbounds wg_sum[li] = 0.0
# do dot first
i = get_global_id()
while i <= size
@inbounds wg_sum[li] += a[i] * b[i]
i += get_global_size()
end
# then tree reduction
offset = get_local_size() ÷ 2
while offset > 0
barrier()
if li <= offset
@inbounds wg_sum[li] += wg_sum[li+offset]
end
offset ÷= 2
end
if li == 1
@inbounds partial[get_group_id()] = wg_sum[li]
end
return
end
partial_sum = oneArray{T}(undef, groupsize)
@oneapi items = groupsize groups = DotWGSize kernel(
data.a,
data.b,
data.size,
partial_sum,
)
oneAPI.synchronize()
return sum(partial_sum)
end
function read_data(data::oneData{T}, _)::VectorData{T} where {T}
return VectorData{T}(data.a, data.b, data.c, data.scalar, data.size)
end
main()

7
JuliaStream.jl/update_all.sh Executable file
View File

@ -0,0 +1,7 @@
#!/bin/bash
# shellcheck disable=SC2034 disable=SC2153
for BACKEND in "." "AMDGPU" "CUDA" "oneAPI" "Threaded" "KernelAbstractions"
do
julia --project="$BACKEND" -e 'import Pkg; Pkg.resolve(); Pkg.instantiate(); Pkg.update(); Pkg.gc();'
done

View File

@ -23,6 +23,8 @@ Currently implemented are:
This code was previously called GPU-STREAM. This code was previously called GPU-STREAM.
This project also contains implementations in alternative languages with different build systems:
* Scala - [scala-stream](./scala-stream)
How is this different to STREAM? How is this different to STREAM?
-------------------------------- --------------------------------
@ -159,10 +161,12 @@ Citing
Please cite BabelStream via this reference: Please cite BabelStream via this reference:
> Deakin T, Price J, Martineau M, McIntosh-Smith S. GPU-STREAM v2.0: Benchmarking the achievable memory bandwidth of many-core processors across diverse parallel programming models. 2016. Paper presented at P^3MA Workshop at ISC High Performance, Frankfurt, Germany. > Deakin T, Price J, Martineau M, McIntosh-Smith S. GPU-STREAM v2.0: Benchmarking the achievable memory bandwidth of many-core processors across diverse parallel programming models. 2016. Paper presented at P^3MA Workshop at ISC High Performance, Frankfurt, Germany. DOI: 10.1007/978- 3-319-46079-6_34
**Other BabelStream publications:** **Other BabelStream publications:**
> Deakin T, Price J, Martineau M, McIntosh-Smith S. Evaluating attainable memory bandwidth of parallel programming models via BabelStream. International Journal of Computational Science and Engineering. Special issue. Vol. 17, No. 3, pp. 247262. 2018.DOI: 10.1504/IJCSE.2018.095847
> Deakin T, McIntosh-Smith S. GPU-STREAM: Benchmarking the achievable memory bandwidth of Graphics Processing Units. 2015. Poster session presented at IEEE/ACM SuperComputing, Austin, United States. > Deakin T, McIntosh-Smith S. GPU-STREAM: Benchmarking the achievable memory bandwidth of Graphics Processing Units. 2015. Poster session presented at IEEE/ACM SuperComputing, Austin, United States.
You can view the [Poster and Extended Abstract](http://sc15.supercomputing.org/sites/all/themes/SC15images/tech_poster/tech_poster_pages/post150.html). You can view the [Poster and Extended Abstract](http://sc15.supercomputing.org/sites/all/themes/SC15images/tech_poster/tech_poster_pages/post150.html).
@ -171,8 +175,6 @@ You can view the [Poster and Extended Abstract](http://sc16.supercomputing.org/s
> Raman K, Deakin T, Price J, McIntosh-Smith S. Improving achieved memory bandwidth from C++ codes on Intel Xeon Phi Processor (Knights Landing). IXPUG Spring Meeting, Cambridge, UK, 2017. > Raman K, Deakin T, Price J, McIntosh-Smith S. Improving achieved memory bandwidth from C++ codes on Intel Xeon Phi Processor (Knights Landing). IXPUG Spring Meeting, Cambridge, UK, 2017.
> Deakin T, Price J, Martineau M, McIntosh-Smith S. Evaluating attainable memory bandwidth of parallel programming models via BabelStream. International Journal of Computational Science and Engineering. Special issue (in press). 2017.
> Deakin T, Price J, McIntosh-Smith S. Portable methods for measuring cache hierarchy performance. 2017. Poster sessions presented at IEEE/ACM SuperComputing, Denver, United States. > Deakin T, Price J, McIntosh-Smith S. Portable methods for measuring cache hierarchy performance. 2017. Poster sessions presented at IEEE/ACM SuperComputing, Denver, United States.
You can view the [Poster and Extended Abstract](http://sc17.supercomputing.org/SC17%20Archive/tech_poster/tech_poster_pages/post155.html) You can view the [Poster and Extended Abstract](http://sc17.supercomputing.org/SC17%20Archive/tech_poster/tech_poster_pages/post155.html)

View File

@ -0,0 +1 @@
{"name":"sbt","version":"1.5.2","bspVersion":"2.0.0-M5","languages":["scala"],"argv":["/usr/lib/jvm/java-11-openjdk-11.0.11.0.9-2.fc33.x86_64/bin/java","-Xms100m","-Xmx100m","-classpath","/home/tom/.local/share/JetBrains/Toolbox/apps/IDEA-U/ch-0/211.7142.45.plugins/Scala/launcher/sbt-launch.jar","xsbt.boot.Boot","-bsp","--sbt-launch-jar=/home/tom/.local/share/JetBrains/Toolbox/apps/IDEA-U/ch-0/211.7142.45.plugins/Scala/launcher/sbt-launch.jar"]}

1
scala-stream/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
target/

2
scala-stream/.jvmopts Normal file
View File

@ -0,0 +1,2 @@
-Xmx4096m
-Xss4m

View File

@ -0,0 +1,34 @@
version = "3.0.0-RC2"
runner.dialect = scala3
style = defaultWithAlign
maxColumn = 100
align.preset = more
rewrite.rules = [
AvoidInfix
RedundantBraces
RedundantParens
AsciiSortImports
PreferCurlyFors
]
rewrite.neverInfix.excludeFilters = [until
to
by
eq
ne
"should.*"
"contain.*"
"must.*"
in
be
taggedAs
thrownBy
synchronized
have
when
size
theSameElementsAs]

102
scala-stream/README.md Normal file
View File

@ -0,0 +1,102 @@
ScalaStream
===========
This is an implementation of BabelStream
in [Scala 3](https://docs.scala-lang.org/scala3/new-in-scala3.html) on the JVM. In theory, this
implementation also covers Java. Scala and Java, like any other programming language, has its own
ecosystem of library supported parallel programming frameworks, we currently implement the
following:
* Parallel streams (introduced in Java 8) - `src/main/scala/scalastream/J8SStream.scala`
* [Scala Parallel Collections](https://github.com/scala/scala-parallel-collections)
- `src/main/scala/scalastream/ParStream.scala`
As the benchmark is relatively simple, we also implement some baselines:
* Single threaded Scala `for` (i.e `foreach` sugar) - `src/main/scala/scalastream/PlainStream.scala`
* Manually parallelism with Java executors - `src/main/scala/scalastream/ThreadedStream.scala`
### Performance considerations
As Scala 3 defaults to Scala 2.13's standard library, we roll our own `Fractional` typeclass with
liberal use of inlining and specialisation. This is motivated by 2.13 stdlib's lack of
specialisation for primitives types on the default `Fractional` and `Numeric` typeclasses.
The use of [Spire](https://github.com/typelevel/spire) to mitigate this was attempted, however, due
to its use of Scala 2 macros, it currently doesn't compile with Scala 3.
### Build & Run
Prerequisites
* JDK >= 8 on any of its supported platform; known working implementations:
- OpenJDK
distributions ([Amazon Corretto](https://docs.aws.amazon.com/corretto/latest/corretto-11-ug/downloads-list.html)
, [Azul](https://www.azul.com/downloads/?version=java-11-lts&package=jdk)
, [AdoptOpenJDK](https://adoptopenjdk.net/), etc)
- Oracle Graal CE/EE 8+
To run the benchmark, first create a binary:
```shell
> ./sbt assembly
```
The binary will be located at `./target/scala-3.0.0/scala-stream.jar`. Run it with:
```shell
> java -version
openjdk version "11.0.11" 2021-04-20
OpenJDK Runtime Environment 18.9 (build 11.0.11+9)
OpenJDK 64-Bit Server VM 18.9 (build 11.0.11+9, mixed mode, sharing)
> java -jar target/scala-3.0.0/scala-stream.jar --help
```
For best results, benchmark with the following JVM flags:
```
-XX:-UseOnStackReplacement # disable OSR, not useful for this benchmark as we are measuring peak performance
-XX:-TieredCompilation # disable C1, go straight to C2
-XX:ReservedCodeCacheSize=512m # don't flush compiled code out of cache at any point
```
Worked example:
```shell
> java -XX:-UseOnStackReplacement -XX:-TieredCompilation -XX:ReservedCodeCacheSize=512m -jar target/scala-3.0.0/scala-stream.jar
BabelStream
Version: 3.4.0
Implementation: Scala Parallel Collections; Scala (Java 11.0.11; Red Hat, Inc.; home=/usr/lib/jvm/java-11-openjdk-11.0.11.0.9-2.fc33.x86_64)
Running kernels 100 times
Precision: double
Array size: 268.4 MB (=0.3 GB)
Total size: 805.3 MB (=0.8 GB)
Function MBytes/sec Min (sec) Max Average
Copy 4087.077 0.13136 0.24896 0.15480
Mul 2934.709 0.18294 0.28706 0.21627
Add 3016.342 0.26698 0.39835 0.31119
Triad 3016.496 0.26697 0.37612 0.31040
Dot 2216.096 0.24226 0.41235 0.28264
```
### Graal Native Image
The port has partial support for Graal Native Image, to generate one, run:
```shell
> ./sbt nativeImage
```
The ELF binary will be located at `./target/native-image/scala-stream`, relocation should work on
the same architecture the binary is built on.
There's an ongoing bug with Scala 3 's use of `lazy val`s where the program crashes at declaration
site. Currently, Scala Parallel Collections uses this feature internally, so selecting this device
will crash at runtime.
The bug originates from the use of `Unsafe` in `lazy val` for thready safety guarantees. It seems
that Graal only supports limited uses of this JVM implementation detail and Scala 3 happens to be on
the unsupported side.

29
scala-stream/build.sbt Normal file
View File

@ -0,0 +1,29 @@
lazy val mainCls = Some("scalastream.App")
lazy val root = (project in file("."))
.enablePlugins(NativeImagePlugin)
.settings(
scalaVersion := "3.0.0",
version := "3.4.0",
organization := "uk.ac.bristol.uob-hpc",
organizationName := "University of Bristol",
Compile / mainClass := mainCls,
assembly / mainClass := mainCls,
scalacOptions ~= filterConsoleScalacOptions,
assembly / assemblyJarName := "scala-stream.jar",
nativeImageOptions := Seq(
"--no-fallback",
"-H:ReflectionConfigurationFiles=../../reflect-config.json"
),
nativeImageVersion := "21.1.0",
(Global / excludeLintKeys) += nativeImageVersion,
name := "scala-stream",
libraryDependencies ++= Seq(
// Lazy val implementation in Scala 3 triggers an exception in nativeImage, use 2_13 for arg parsing for now otherwise we can't get to the benchmarking part
("com.github.scopt" %% "scopt" % "4.0.1").cross(CrossVersion.for3Use2_13),
// par also uses lazy val at some point, so it doesn't work in nativeImage
"org.scala-lang.modules" %% "scala-parallel-collections" % "1.0.3",
"net.openhft" % "affinity" % "3.21ea1",
"org.slf4j" % "slf4j-simple" % "1.7.30" // for affinity
)
)

View File

@ -0,0 +1 @@
sbt.version=1.5.2

View File

@ -0,0 +1,6 @@
addSbtPlugin("com.timushev.sbt" % "sbt-updates" % "0.5.3")
addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.1.17")
addSbtPlugin("org.scalameta" % "sbt-native-image" % "0.3.0")
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.15.0")
addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.9.27")
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.2")

View File

@ -0,0 +1,11 @@
[
{
"name": "sun.misc.Unsafe",
"fields": [
{
"name": "theUnsafe",
"allowUnsafeAccess": true
}
]
}
]

3
scala-stream/sbt Executable file
View File

@ -0,0 +1,3 @@
#!/usr/bin/env bash
./sbt-dist/bin/sbt "$@"

Binary file not shown.

177
scala-stream/sbt-dist/bin/sbt Executable file
View File

@ -0,0 +1,177 @@
#!/usr/bin/env bash
### ------------------------------- ###
### Helper methods for BASH scripts ###
### ------------------------------- ###
realpath () {
(
TARGET_FILE="$1"
FIX_CYGPATH="$2"
cd "$(dirname "$TARGET_FILE")"
TARGET_FILE=$(basename "$TARGET_FILE")
COUNT=0
while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ]
do
TARGET_FILE=$(readlink "$TARGET_FILE")
cd "$(dirname "$TARGET_FILE")"
TARGET_FILE=$(basename "$TARGET_FILE")
COUNT=$(($COUNT + 1))
done
# make sure we grab the actual windows path, instead of cygwin's path.
if [[ "x$FIX_CYGPATH" != "x" ]]; then
echo "$(cygwinpath "$(pwd -P)/$TARGET_FILE")"
else
echo "$(pwd -P)/$TARGET_FILE"
fi
)
}
# Uses uname to detect if we're in the odd cygwin environment.
is_cygwin() {
local os=$(uname -s)
case "$os" in
CYGWIN*) return 0 ;;
MINGW*) return 0 ;;
MSYS*) return 0 ;;
*) return 1 ;;
esac
}
# TODO - Use nicer bash-isms here.
CYGWIN_FLAG=$(if is_cygwin; then echo true; else echo false; fi)
# This can fix cygwin style /cygdrive paths so we get the
# windows style paths.
cygwinpath() {
local file="$1"
if [[ "$CYGWIN_FLAG" == "true" ]]; then
echo $(cygpath -w $file)
else
echo $file
fi
}
. "$(dirname "$(realpath "$0")")/sbt-launch-lib.bash"
declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
declare -r sbt_opts_file=".sbtopts"
declare -r etc_sbt_opts_file="/etc/sbt/sbtopts"
declare -r dist_sbt_opts_file="${sbt_home}/conf/sbtopts"
declare -r win_sbt_opts_file="${sbt_home}/conf/sbtconfig.txt"
usage() {
cat <<EOM
Usage: `basename "$0"` [options]
-h | -help print this message
-v | -verbose this runner is chattier
-d | -debug set sbt log level to debug
-no-colors disable ANSI color codes
-sbt-create start sbt even if current directory contains no sbt project
-sbt-dir <path> path to global settings/plugins directory (default: ~/.sbt)
-sbt-boot <path> path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
-ivy <path> path to local Ivy repository (default: ~/.ivy2)
-mem <integer> set memory options (default: $sbt_default_mem, which is $(get_mem_opts))
-no-share use all local caches; no sharing
-no-global uses global caches, but does not use global ~/.sbt directory.
-jvm-debug <port> Turn on JVM debugging, open at the given port.
-batch Disable interactive mode
# sbt version (default: from project/build.properties if present, else latest release)
-sbt-version <version> use the specified version of sbt
-sbt-jar <path> use the specified jar as the sbt launcher
-sbt-rc use an RC version of sbt
-sbt-snapshot use a snapshot version of sbt
# java version (default: java from PATH, currently $(java -version 2>&1 | grep version))
-java-home <path> alternate JAVA_HOME
# jvm options and output control
JAVA_OPTS environment variable, if unset uses "$java_opts"
.jvmopts if this file exists in the current directory, its contents
are appended to JAVA_OPTS
SBT_OPTS environment variable, if unset uses "$default_sbt_opts"
.sbtopts if this file exists in the current directory, its contents
are prepended to the runner args
/etc/sbt/sbtopts if this file exists, it is prepended to the runner args
-Dkey=val pass -Dkey=val directly to the java runtime
-J-X pass option -X directly to the java runtime
(-J is stripped)
-S-X add -X to sbt's scalacOptions (-S is stripped)
In the case of duplicated or conflicting options, the order above
shows precedence: JAVA_OPTS lowest, command line options highest.
EOM
}
process_my_args () {
while [[ $# -gt 0 ]]; do
case "$1" in
-no-colors) addJava "-Dsbt.log.noformat=true" && shift ;;
-no-share) addJava "$noshare_opts" && shift ;;
-no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;;
-sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;;
-sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;;
-debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;;
-batch) exec </dev/null && shift ;;
-sbt-create) sbt_create=true && shift ;;
new) sbt_new=true && addResidual "$1" && shift ;;
*) addResidual "$1" && shift ;;
esac
done
# Now, ensure sbt version is used.
[[ "${sbt_version}XXX" != "XXX" ]] && addJava "-Dsbt.version=$sbt_version"
# Confirm a user's intent if the current directory does not look like an sbt
# top-level directory and neither the -sbt-create option nor the "new"
# command was given.
[[ -f ./build.sbt || -d ./project || -n "$sbt_create" || -n "$sbt_new" ]] || {
echo "[warn] Neither build.sbt nor a 'project' directory in the current directory: $(pwd)"
while true; do
echo 'c) continue'
echo 'q) quit'
read -p '? ' || exit 1
case "$REPLY" in
c|C) break ;;
q|Q) exit 1 ;;
esac
done
}
}
loadConfigFile() {
# Make sure the last line is read even if it doesn't have a terminating \n
cat "$1" | sed $'/^\#/d;s/\r$//' | while read -r line || [[ -n "$line" ]]; do
eval echo $line
done
}
# Here we pull in the default settings configuration.
[[ -f "$dist_sbt_opts_file" ]] && set -- $(loadConfigFile "$dist_sbt_opts_file") "$@"
# Here we pull in the global settings configuration.
[[ -f "$etc_sbt_opts_file" ]] && set -- $(loadConfigFile "$etc_sbt_opts_file") "$@"
# Pull in the project-level config file, if it exists.
[[ -f "$sbt_opts_file" ]] && set -- $(loadConfigFile "$sbt_opts_file") "$@"
# Pull in the project-level java config, if it exists.
[[ -f ".jvmopts" ]] && export JAVA_OPTS="$JAVA_OPTS $(loadConfigFile .jvmopts)"
run "$@"

View File

@ -0,0 +1,363 @@
#!/usr/bin/env bash
#
# A library to simplify using the SBT launcher from other packages.
# Note: This should be used by tools like giter8/conscript etc.
# TODO - Should we merge the main SBT script with this library?
declare -a residual_args
declare -a java_args
declare -a scalac_args
declare -a sbt_commands
declare java_cmd=java
declare java_version
declare init_sbt_version="1.2.8"
declare sbt_default_mem=1024
declare SCRIPT=$0
while [ -h "$SCRIPT" ] ; do
ls=$(ls -ld "$SCRIPT")
# Drop everything prior to ->
link=$(expr "$ls" : '.*-> \(.*\)$')
if expr "$link" : '/.*' > /dev/null; then
SCRIPT="$link"
else
SCRIPT=$(dirname "$SCRIPT")/"$link"
fi
done
declare -r sbt_bin_dir="$(dirname "$SCRIPT")"
declare -r sbt_home="$(dirname "$sbt_bin_dir")"
echoerr () {
echo 1>&2 "$@"
}
vlog () {
[[ $verbose || $debug ]] && echoerr "$@"
}
dlog () {
[[ $debug ]] && echoerr "$@"
}
jar_file () {
echo "$(cygwinpath "${sbt_home}/bin/sbt-launch.jar")"
}
acquire_sbt_jar () {
sbt_jar="$(jar_file)"
if [[ ! -f "$sbt_jar" ]]; then
echoerr "Could not find launcher jar: $sbt_jar"
exit 2
fi
}
rt_export_file () {
echo "${sbt_bin_dir}/java9-rt-export.jar"
}
execRunner () {
# print the arguments one to a line, quoting any containing spaces
[[ $verbose || $debug ]] && echo "# Executing command line:" && {
for arg; do
if printf "%s\n" "$arg" | grep -q ' '; then
printf "\"%s\"\n" "$arg"
else
printf "%s\n" "$arg"
fi
done
echo ""
}
# THis used to be exec, but we loose the ability to re-hook stty then
# for cygwin... Maybe we should flag the feature here...
"$@"
}
addJava () {
dlog "[addJava] arg = '$1'"
java_args=( "${java_args[@]}" "$1" )
}
addSbt () {
dlog "[addSbt] arg = '$1'"
sbt_commands=( "${sbt_commands[@]}" "$1" )
}
addResidual () {
dlog "[residual] arg = '$1'"
residual_args=( "${residual_args[@]}" "$1" )
}
addDebugger () {
addJava "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$1"
}
get_mem_opts () {
# if we detect any of these settings in ${JAVA_OPTS} or ${JAVA_TOOL_OPTIONS} we need to NOT output our settings.
# The reason is the Xms/Xmx, if they don't line up, cause errors.
if [[ "${JAVA_OPTS}" == *-Xmx* ]] || [[ "${JAVA_OPTS}" == *-Xms* ]] || [[ "${JAVA_OPTS}" == *-XX:MaxPermSize* ]] || [[ "${JAVA_OPTS}" == *-XX:MaxMetaspaceSize* ]] || [[ "${JAVA_OPTS}" == *-XX:ReservedCodeCacheSize* ]]; then
echo ""
elif [[ "${JAVA_TOOL_OPTIONS}" == *-Xmx* ]] || [[ "${JAVA_TOOL_OPTIONS}" == *-Xms* ]] || [[ "${JAVA_TOOL_OPTIONS}" == *-XX:MaxPermSize* ]] || [[ "${JAVA_TOOL_OPTIONS}" == *-XX:MaxMetaspaceSize* ]] || [[ "${JAVA_TOOL_OPTIONS}" == *-XX:ReservedCodeCacheSize* ]]; then
echo ""
elif [[ "${SBT_OPTS}" == *-Xmx* ]] || [[ "${SBT_OPTS}" == *-Xms* ]] || [[ "${SBT_OPTS}" == *-XX:MaxPermSize* ]] || [[ "${SBT_OPTS}" == *-XX:MaxMetaspaceSize* ]] || [[ "${SBT_OPTS}" == *-XX:ReservedCodeCacheSize* ]]; then
echo ""
else
# a ham-fisted attempt to move some memory settings in concert
# so they need not be messed around with individually.
local mem=${1:-$sbt_default_mem}
local codecache=$(( $mem / 8 ))
(( $codecache > 128 )) || codecache=128
(( $codecache < 512 )) || codecache=512
local class_metadata_size=$(( $codecache * 2 ))
if [[ -z $java_version ]]; then
java_version=$(jdk_version)
fi
local class_metadata_opt=$((( $java_version < 8 )) && echo "MaxPermSize" || echo "MaxMetaspaceSize")
local arg_xms=$([[ "${java_args[@]}" == *-Xms* ]] && echo "" || echo "-Xms${mem}m")
local arg_xmx=$([[ "${java_args[@]}" == *-Xmx* ]] && echo "" || echo "-Xmx${mem}m")
local arg_rccs=$([[ "${java_args[@]}" == *-XX:ReservedCodeCacheSize* ]] && echo "" || echo "-XX:ReservedCodeCacheSize=${codecache}m")
local arg_meta=$([[ "${java_args[@]}" == *-XX:${class_metadata_opt}* && ! (( $java_version < 8 )) ]] && echo "" || echo "-XX:${class_metadata_opt}=${class_metadata_size}m")
echo "${arg_xms} ${arg_xmx} ${arg_rccs} ${arg_meta}"
fi
}
get_gc_opts () {
local older_than_9=$(( $java_version < 9 ))
if [[ "$older_than_9" == "1" ]]; then
# don't need to worry about gc
echo ""
elif [[ "${JAVA_OPTS}" =~ Use.*GC ]] || [[ "${JAVA_TOOL_OPTIONS}" =~ Use.*GC ]] || [[ "${SBT_OPTS}" =~ Use.*GC ]] ; then
# GC arg has been passed in - don't change
echo ""
else
# Java 9+ so revert to old
echo "-XX:+UseParallelGC"
fi
}
require_arg () {
local type="$1"
local opt="$2"
local arg="$3"
if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then
echo "$opt requires <$type> argument"
exit 1
fi
}
is_function_defined() {
declare -f "$1" > /dev/null
}
# parses JDK version from the -version output line.
# 8 for 1.8.0_nn, 9 for 9-ea etc, and "no_java" for undetected
jdk_version() {
local result
local lines=$("$java_cmd" -Xms32M -Xmx32M -version 2>&1 | tr '\r' '\n')
local IFS=$'\n'
for line in $lines; do
if [[ (-z $result) && ($line = *"version \""*) ]]
then
local ver=$(echo $line | sed -e 's/.*version "\(.*\)"\(.*\)/\1/; 1q')
# on macOS sed doesn't support '?'
if [[ $ver = "1."* ]]
then
result=$(echo $ver | sed -e 's/1\.\([0-9]*\)\(.*\)/\1/; 1q')
else
result=$(echo $ver | sed -e 's/\([0-9]*\)\(.*\)/\1/; 1q')
fi
fi
done
if [[ -z $result ]]
then
result=no_java
fi
echo "$result"
}
process_args () {
while [[ $# -gt 0 ]]; do
case "$1" in
-h|-help) usage; exit 1 ;;
-v|-verbose) verbose=1 && shift ;;
-d|-debug) debug=1 && addSbt "-debug" && shift ;;
-ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;;
-mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;;
-jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;;
-batch) exec </dev/null && shift ;;
-sbt-jar) require_arg path "$1" "$2" && sbt_jar="$2" && shift 2 ;;
-sbt-version) require_arg version "$1" "$2" && sbt_version="$2" && shift 2 ;;
-java-home) require_arg path "$1" "$2" &&
java_cmd="$2/bin/java" &&
export JAVA_HOME="$2" &&
export JDK_HOME="$2" &&
export PATH="$2/bin:$PATH" &&
shift 2 ;;
"-D*") addJava "$1" && shift ;;
-J*) addJava "${1:2}" && shift ;;
*) addResidual "$1" && shift ;;
esac
done
is_function_defined process_my_args && {
myargs=("${residual_args[@]}")
residual_args=()
process_my_args "${myargs[@]}"
}
java_version="$(jdk_version)"
vlog "[process_args] java_version = '$java_version'"
}
# Extracts the preloaded directory from either -Dsbt.preloaded or -Dsbt.global.base
# properties by looking at:
# - _JAVA_OPTIONS environment variable,
# - SBT_OPTS environment variable,
# - JAVA_OPTS environment variable and
# - properties set by command-line options
# in that order. The last one will be chosen such that `sbt.preloaded` is
# always preferred over `sbt.global.base`.
getPreloaded() {
local -a _java_options_array
local -a sbt_opts_array
local -a java_opts_array
read -a _java_options_array <<< "$_JAVA_OPTIONS"
read -a sbt_opts_array <<< "$SBT_OPTS"
read -a java_opts_array <<< "$JAVA_OPTS"
local args_to_check=(
"${_java_options_array[@]}"
"${sbt_opts_array[@]}"
"${java_opts_array[@]}"
"${java_args[@]}")
local via_global_base="$HOME/.sbt/preloaded"
local via_explicit=""
for opt in "${args_to_check[@]}"; do
if [[ "$opt" == -Dsbt.preloaded=* ]]; then
via_explicit="${opt#-Dsbt.preloaded=}"
elif [[ "$opt" == -Dsbt.global.base=* ]]; then
via_global_base="${opt#-Dsbt.global.base=}/preloaded"
fi
done
echo "${via_explicit:-${via_global_base}}"
}
syncPreloaded() {
local source_preloaded="$sbt_home/lib/local-preloaded/"
local target_preloaded="$(getPreloaded)"
if [[ "$init_sbt_version" == "" ]]; then
# FIXME: better $init_sbt_version detection
init_sbt_version="$(ls -1 "$source_preloaded/org.scala-sbt/sbt/")"
fi
[[ -f "$target_preloaded/org.scala-sbt/sbt/$init_sbt_version/jars/sbt.jar" ]] || {
# lib/local-preloaded exists (This is optional)
[[ -d "$source_preloaded" ]] && {
command -v rsync >/dev/null 2>&1 && {
mkdir -p "$target_preloaded"
rsync -a --ignore-existing "$source_preloaded" "$target_preloaded"
}
}
}
}
# Detect that we have java installed.
checkJava() {
local required_version="$1"
# Now check to see if it's a good enough version
local good_enough="$(expr $java_version ">=" $required_version)"
if [[ "$java_version" == "" ]]; then
echo
echo "No Java Development Kit (JDK) installation was detected."
echo Please go to http://www.oracle.com/technetwork/java/javase/downloads/ and download.
echo
exit 1
elif [[ "$good_enough" != "1" ]]; then
echo
echo "The Java Development Kit (JDK) installation you have is not up to date."
echo $script_name requires at least version $required_version+, you have
echo version $java_version
echo
echo Please go to http://www.oracle.com/technetwork/java/javase/downloads/ and download
echo a valid JDK and install before running $script_name.
echo
exit 1
fi
}
copyRt() {
local at_least_9="$(expr $java_version ">=" 9)"
if [[ "$at_least_9" == "1" ]]; then
rtexport=$(rt_export_file)
# The grep for java9-rt-ext- matches the filename prefix printed in Export.java
java9_ext=$("$java_cmd" ${JAVA_OPTS} ${SBT_OPTS:-$default_sbt_opts} ${java_args[@]} \
-jar "$rtexport" --rt-ext-dir | grep java9-rt-ext-)
java9_rt=$(echo "$java9_ext/rt.jar")
vlog "[copyRt] java9_rt = '$java9_rt'"
if [[ ! -f "$java9_rt" ]]; then
echo Copying runtime jar.
mkdir -p "$java9_ext"
execRunner "$java_cmd" \
${JAVA_OPTS} \
${SBT_OPTS:-$default_sbt_opts} \
${java_args[@]} \
-jar "$rtexport" \
"${java9_rt}"
fi
addJava "-Dscala.ext.dirs=${java9_ext}"
fi
}
run() {
# process the combined args, then reset "$@" to the residuals
process_args "$@"
set -- "${residual_args[@]}"
argumentCount=$#
# Copy preloaded repo to user's preloaded directory
syncPreloaded
# no jar? download it.
[[ -f "$sbt_jar" ]] || acquire_sbt_jar "$sbt_version" || {
# still no jar? uh-oh.
echo "Download failed. Obtain the sbt-launch.jar manually and place it at $sbt_jar"
exit 1
}
# TODO - java check should be configurable...
checkJava "6"
# Java 9 support
copyRt
#If we're in cygwin, we should use the windows config, and terminal hacks
if [[ "$CYGWIN_FLAG" == "true" ]]; then
stty -icanon min 1 -echo > /dev/null 2>&1
addJava "-Djline.terminal=jline.UnixTerminal"
addJava "-Dsbt.cygwin=true"
fi
# run sbt
execRunner "$java_cmd" \
$(get_mem_opts $sbt_mem) \
$(get_gc_opts) \
${JAVA_OPTS} \
${SBT_OPTS:-$default_sbt_opts} \
${java_args[@]} \
-jar "$sbt_jar" \
"${sbt_commands[@]}" \
"${residual_args[@]}"
exit_code=$?
# Clean up the terminal from cygwin hacks.
if [[ "$CYGWIN_FLAG" == "true" ]]; then
stty icanon echo > /dev/null 2>&1
fi
exit $exit_code
}

Binary file not shown.

View File

@ -0,0 +1,212 @@
@REM SBT launcher script
@REM
@REM Environment:
@REM JAVA_HOME - location of a JDK home dir (mandatory)
@REM SBT_OPTS - JVM options (optional)
@REM Configuration:
@REM sbtconfig.txt found in the SBT_HOME.
@REM ZOMG! We need delayed expansion to build up CFG_OPTS later
@setlocal enabledelayedexpansion
@echo off
set SBT_HOME=%~dp0
set SBT_ARGS=
rem FIRST we load the config file of extra options.
set FN=%SBT_HOME%\..\conf\sbtconfig.txt
set CFG_OPTS=
FOR /F "tokens=* eol=# usebackq delims=" %%i IN ("%FN%") DO (
set DO_NOT_REUSE_ME=%%i
rem ZOMG (Part #2) WE use !! here to delay the expansion of
rem CFG_OPTS, otherwise it remains "" for this loop.
set CFG_OPTS=!CFG_OPTS! !DO_NOT_REUSE_ME!
)
rem poor man's jenv (which is not available on Windows)
IF DEFINED JAVA_HOMES (
IF EXIST .java-version FOR /F %%A IN (.java-version) DO (
SET JAVA_HOME=%JAVA_HOMES%\%%A
SET JDK_HOME=%JAVA_HOMES%\%%A
)
)
rem must set PATH or wrong javac is used for java projects
IF DEFINED JAVA_HOME SET "PATH=%JAVA_HOME%\bin;%PATH%"
rem users can set JAVA_OPTS via .jvmopts (sbt-extras style)
IF EXIST .jvmopts FOR /F %%A IN (.jvmopts) DO (
SET _jvmopts_line=%%A
IF NOT "!_jvmopts_line:~0,1!"=="#" (
SET JAVA_OPTS=%%A !JAVA_OPTS!
)
)
rem We use the value of the JAVACMD environment variable if defined
set _JAVACMD=%JAVACMD%
if "%_JAVACMD%"=="" (
if not "%JAVA_HOME%"=="" (
if exist "%JAVA_HOME%\bin\java.exe" set "_JAVACMD=%JAVA_HOME%\bin\java.exe"
)
)
if "%_JAVACMD%"=="" set _JAVACMD=java
rem We use the value of the JAVA_OPTS environment variable if defined, rather than the config.
set _JAVA_OPTS=%JAVA_OPTS%
if "%_JAVA_OPTS%"=="" set _JAVA_OPTS=%CFG_OPTS%
set INIT_SBT_VERSION=1.2.8
:args_loop
if "%~1" == "" goto args_end
if "%~1" == "-jvm-debug" (
set JVM_DEBUG=true
set /a JVM_DEBUG_PORT=5005 2>nul >nul
) else if "!JVM_DEBUG!" == "true" (
set /a JVM_DEBUG_PORT=%1 2>nul >nul
if not "%~1" == "!JVM_DEBUG_PORT!" (
set SBT_ARGS=!SBT_ARGS! %1
)
) else if /I "%~1" == "new" (
set sbt_new=true
set SBT_ARGS=!SBT_ARGS! %1
) else (
set SBT_ARGS=!SBT_ARGS! %1
)
shift
goto args_loop
:args_end
rem Confirm a user's intent if the current directory does not look like an sbt
rem top-level directory and the "new" command was not given.
if not exist build.sbt (
if not exist project\ (
if not defined sbt_new (
echo [warn] Neither build.sbt nor a 'project' directory in the current directory: %CD%
setlocal
:confirm
echo c^) continue
echo q^) quit
set /P reply=?^
if /I "!reply!" == "c" (
goto confirm_end
) else if /I "!reply!" == "q" (
exit /B 1
)
goto confirm
:confirm_end
endlocal
)
)
)
call :process
call :checkjava
call :copyrt
if defined JVM_DEBUG_PORT (
set _JAVA_OPTS=!_JAVA_OPTS! -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=!JVM_DEBUG_PORT!
)
call :sync_preloaded
call :run %SBT_ARGS%
if ERRORLEVEL 1 goto error
goto end
:run
"%_JAVACMD%" %_JAVA_OPTS% %SBT_OPTS% -cp "%SBT_HOME%sbt-launch.jar" xsbt.boot.Boot %*
goto :eof
:process
rem Parses x out of 1.x; for example 8 out of java version 1.8.0_xx
rem Otherwise, parses the major version; 9 out of java version 9-ea
set JAVA_VERSION=0
for /f "tokens=3" %%g in ('"%_JAVACMD%" -Xms32M -Xmx32M -version 2^>^&1 ^| findstr /i version') do (
set JAVA_VERSION=%%g
)
set JAVA_VERSION=%JAVA_VERSION:"=%
for /f "delims=.-_ tokens=1-2" %%v in ("%JAVA_VERSION%") do (
if /I "%%v" EQU "1" (
set JAVA_VERSION=%%w
) else (
set JAVA_VERSION=%%v
)
)
exit /B 0
:checkjava
set required_version=6
if /I %JAVA_VERSION% GEQ %required_version% (
exit /B 0
)
echo.
echo The Java Development Kit (JDK) installation you have is not up to date.
echo sbt requires at least version %required_version%+, you have
echo version %JAVA_VERSION%
echo.
echo Please go to http://www.oracle.com/technetwork/java/javase/downloads/ and download
echo a valid JDK and install before running sbt.
echo.
exit /B 1
:copyrt
if /I %JAVA_VERSION% GEQ 9 (
set rtexport=!SBT_HOME!java9-rt-export.jar
"%_JAVACMD%" %_JAVA_OPTS% %SBT_OPTS% -jar "!rtexport!" --rt-ext-dir > "%TEMP%.\rtext.txt"
set /p java9_ext= < "%TEMP%.\rtext.txt"
set java9_rt=!java9_ext!\rt.jar
if not exist "!java9_rt!" (
mkdir "!java9_ext!"
"%_JAVACMD%" %_JAVA_OPTS% %SBT_OPTS% -jar "!rtexport!" "!java9_rt!"
)
set _JAVA_OPTS=!_JAVA_OPTS! -Dscala.ext.dirs="!java9_ext!"
rem check to see if a GC has been set in the opts
echo !_JAVA_OPTS! | findstr /r "Use.*GC" >nul
if ERRORLEVEL 1 (
rem don't have a GC set - revert to old GC
set _JAVA_OPTS=!_JAVA_OPTS! -XX:+UseParallelGC
)
)
exit /B 0
:sync_preloaded
if "%INIT_SBT_VERSION%"=="" (
rem FIXME: better %INIT_SBT_VERSION% detection
FOR /F "tokens=* USEBACKQ" %%F IN (`dir /b "%SBT_HOME%\..\lib\local-preloaded\org.scala-sbt\sbt" /B`) DO (
SET INIT_SBT_VERSION=%%F
)
)
set PRELOAD_SBT_JAR="%UserProfile%\.sbt\preloaded\org.scala-sbt\sbt\%INIT_SBT_VERSION%\jars\sbt.jar"
if /I %JAVA_VERSION% GEQ 8 (
where robocopy >nul 2>nul
if %ERRORLEVEL% equ 0 (
REM echo %PRELOAD_SBT_JAR%
if not exist %PRELOAD_SBT_JAR% (
if exist "%SBT_HOME%\..\lib\local-preloaded\" (
echo "about to robocopy"
robocopy "%SBT_HOME%\..\lib\local-preloaded" "%UserProfile%\.sbt\preloaded" /E
)
)
)
)
exit /B 0
:error
@endlocal
exit /B 1
:end
@endlocal
exit /B 0

View File

@ -0,0 +1,14 @@
# Set the java args to high
-Xmx512M
-XX:MaxPermSize=256m
-XX:ReservedCodeCacheSize=128m
# Set the extra SBT options
-Dsbt.log.format=true

View File

@ -0,0 +1,49 @@
# ------------------------------------------------ #
# The SBT Configuration file. #
# ------------------------------------------------ #
# Disable ANSI color codes
#
#-no-colors
# Starts sbt even if the current directory contains no sbt project.
#
-sbt-create
# Path to global settings/plugins directory (default: ~/.sbt)
#
#-sbt-dir /etc/sbt
# Path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
#
#-sbt-boot ~/.sbt/boot
# Path to local Ivy repository (default: ~/.ivy2)
#
#-ivy ~/.ivy2
# set memory options
#
#-mem <integer>
# Use local caches for projects, no sharing.
#
#-no-share
# Put SBT in offline mode.
#
#-offline
# Sets the SBT version to use.
#-sbt-version 0.11.3
# Scala version (default: latest release)
#
#-scala-home <path>
#-scala-version <version>
# java version (default: java from PATH, currently $(java -version |& grep version))
#
#-java-home <path>

View File

@ -0,0 +1,44 @@
package scalastream
import scalastream.App.{Config, Data}
import scala.collection.immutable.ArraySeq
import scala.reflect.{ClassTag, classTag}
class J8SStream[@specialized(Float, Double) A: Fractional: ClassTag](val config: Config[A])
extends ScalaStream[A]:
private var a: Array[A] = _
private var b: Array[A] = _
private var c: Array[A] = _
private val scalar: A = config.scalar
inline private def stream =
java.util.stream.IntStream.range(0, config.options.arraysize).parallel()
override inline def initArrays(): Unit =
a = Array.ofDim(config.options.arraysize)
b = Array.ofDim(config.options.arraysize)
c = Array.ofDim(config.options.arraysize)
stream.forEach { i =>
a(i) = config.init._1
b(i) = config.init._2
c(i) = config.init._3
}
override inline def copy(): Unit = stream.forEach(i => c(i) = a(i))
override inline def mul(): Unit = stream.forEach(i => b(i) = scalar * c(i))
override inline def add(): Unit = stream.forEach(i => c(i) = a(i) + b(i))
override inline def triad(): Unit = stream.forEach(i => a(i) = b(i) + scalar * c(i))
override inline def nstream(): Unit = stream.forEach(i => a(i) = b(i) * scalar * c(i))
override inline def dot(): A =
// horrible special-case for double, there isn't a mapToFloat so we give up on that
val cls = classTag[A].runtimeClass
if java.lang.Double.TYPE == cls then
stream
.mapToDouble(i => (a(i) * b(i)).asInstanceOf[Double])
.reduce(0, (l: Double, r: Double) => l + r)
.asInstanceOf[A]
else stream.mapToObj[A](i => a(i) * b(i)).reduce(0.fractional, (l: A, r: A) => l + r)
override inline def data(): Data[A] = Data(a.to(ArraySeq), b.to(ArraySeq), c.to(ArraySeq))

View File

@ -0,0 +1,36 @@
package scalastream
import scalastream.App.{Config, Data}
import scala.collection.immutable.ArraySeq
import scala.collection.parallel.CollectionConverters._
import scala.reflect.ClassTag
class ParStream[@specialized(Float, Double) A: Fractional: ClassTag](val config: Config[A])
extends ScalaStream[A]:
private var a: Array[A] = _
private var b: Array[A] = _
private var c: Array[A] = _
private val scalar: A = config.scalar
inline private def indices = (0 until config.options.arraysize).par
override inline def initArrays(): Unit =
a = Array.ofDim(config.options.arraysize)
b = Array.ofDim(config.options.arraysize)
c = Array.ofDim(config.options.arraysize)
for i <- indices do
a(i) = config.init._1
b(i) = config.init._2
c(i) = config.init._3
override inline def copy(): Unit = for i <- indices do c(i) = a(i)
override inline def mul(): Unit = for i <- indices do b(i) = scalar * c(i)
override inline def add(): Unit = for i <- indices do c(i) = a(i) + b(i)
override inline def triad(): Unit = for i <- indices do a(i) = b(i) + scalar * c(i)
override inline def nstream(): Unit = for i <- indices do a(i) = b(i) * scalar * c(i)
override inline def dot(): A =
indices.aggregate[A](0.fractional)((acc, i) => acc + (a(i) * b(i)), _ + _)
override inline def data(): Data[A] = Data(a.to(ArraySeq), b.to(ArraySeq), c.to(ArraySeq))

View File

@ -0,0 +1,31 @@
package scalastream
import scalastream.App.{Config, Data}
import scala.collection.immutable.ArraySeq
import scala.reflect.ClassTag
class PlainStream[@specialized(Float, Double) A: Fractional: ClassTag](val config: Config[A])
extends ScalaStream[A]:
private var a: Array[A] = _
private var b: Array[A] = _
private var c: Array[A] = _
private val scalar: A = config.scalar
override inline def initArrays(): Unit =
a = Array.fill(config.options.arraysize)(config.init._1)
b = Array.fill(config.options.arraysize)(config.init._2)
c = Array.fill(config.options.arraysize)(config.init._3)
private inline def indices = 0 until config.options.arraysize
override inline def copy(): Unit = for i <- indices do c(i) = a(i)
override inline def mul(): Unit = for i <- indices do b(i) = scalar * c(i)
override inline def add(): Unit = for i <- indices do c(i) = a(i) + b(i)
override inline def triad(): Unit = for i <- indices do a(i) = b(i) + (scalar * c(i))
override inline def nstream(): Unit = for i <- indices do a(i) = b(i) * scalar * c(i)
override inline def dot(): A =
var acc: A = 0.fractional
for i <- indices do acc = acc + (a(i) * b(i))
acc
override inline def data(): Data[A] = Data(a.to(ArraySeq), b.to(ArraySeq), c.to(ArraySeq))

View File

@ -0,0 +1,369 @@
package scalastream
import scalastream.App.{Config, Data, Timings}
import java.util.concurrent.TimeUnit
import scala.collection.immutable.ArraySeq
import scala.collection.mutable.ArrayBuffer
import scala.concurrent.duration.{Duration, FiniteDuration, SECONDS}
import scala.math.{Pi, pow}
import scala.reflect.ClassTag
import scopt.OParser
transparent trait ScalaStream[@specialized(Float, Double) A]:
def config: Config[A]
def initArrays(): Unit
def copy(): Unit
def mul(): Unit
def add(): Unit
def triad(): Unit
def nstream(): Unit
def dot(): A
transparent inline def timed[R](f: => R): (FiniteDuration, R) =
val start = System.nanoTime()
val r = f
val end = System.nanoTime()
FiniteDuration(end - start, TimeUnit.NANOSECONDS) -> r
inline def runAll(times: Int)(using Fractional[A]): (Timings[Vector[FiniteDuration]], A) =
val copy = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
val mul = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
val add = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
val triad = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
val dot = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
var lastSum: A = 0.fractional
for i <- 0 until times do
copy(i) = timed(this.copy())._1
mul(i) = timed(this.mul())._1
add(i) = timed(this.add())._1
triad(i) = timed(this.triad())._1
val (dot_, sum) = timed(this.dot())
dot(i) = dot_
lastSum = sum
val s = lastSum
(
Timings(
copy = copy.toVector,
mul = mul.toVector,
add = add.toVector,
triad = triad.toVector,
dot = dot.toVector
),
s
)
def runTriad(times: Int): FiniteDuration = timed(for _ <- 0 until times do triad())._1
def runNStream(times: Int): Vector[FiniteDuration] = Vector.fill(times)(timed(nstream())._1)
def data(): Data[A]
trait Fractional[@specialized(Double, Float) A]:
def toFractional(f: Float): A
def toFractional(f: Double): A
def compare(x: A, y: A): Int
def add(x: A, y: A): A
def sub(x: A, y: A): A
def mul(x: A, y: A): A
def div(x: A, y: A): A
def abs(x: A): A
extension (x: Float) inline def fractional = toFractional(x)
extension (x: Double) inline def fractional = toFractional(x)
extension (x: Int) inline def fractional = toFractional(x.toFloat)
extension (x: Long) inline def fractional = toFractional(x.toDouble)
extension (x: A)
inline def +(y: A) = add(x, y)
inline def -(y: A) = sub(x, y)
inline def *(y: A) = mul(x, y)
inline def /(y: A) = div(x, y)
inline def >(y: A) = compare(x, y) > 0
inline def <(y: A) = compare(x, y) < 0
inline def abs_ = abs(x)
end Fractional
given FloatFractional: Fractional[Float] with
inline def toFractional(f: Float): Float = f
inline def toFractional(f: Double): Float = f.toFloat
inline def compare(x: Float, y: Float): Int = x.compare(y)
inline def add(x: Float, y: Float): Float = x + y
inline def sub(x: Float, y: Float): Float = x - y
inline def mul(x: Float, y: Float): Float = x * y
inline def div(x: Float, y: Float): Float = x / y
inline def abs(x: Float): Float = math.abs(x)
given DoubleFractional: Fractional[Double] with
inline def toFractional(f: Float): Double = f.toDouble
inline def toFractional(f: Double): Double = f
inline def compare(x: Double, y: Double): Int = x.compare(y)
inline def add(x: Double, y: Double): Double = x + y
inline def sub(x: Double, y: Double): Double = x - y
inline def mul(x: Double, y: Double): Double = x * y
inline def div(x: Double, y: Double): Double = x / y
inline def abs(x: Double): Double = math.abs(x)
object App:
final val Version: String = "3.4.0"
case class Config[@specialized(Double, Float) A](
options: Options,
benchmark: Benchmark,
typeSize: Int,
ulp: A,
scalar: A,
init: (A, A, A)
)
case class Timings[A](copy: A, mul: A, add: A, triad: A, dot: A)
case class Data[A](@specialized(Double, Float) a: ArraySeq[A], b: ArraySeq[A], c: ArraySeq[A])
case class Options(
list: Boolean = false,
device: Int = 0,
numtimes: Int = 100,
arraysize: Int = 33554432,
float: Boolean = false,
triad_only: Boolean = false,
nstream_only: Boolean = false,
csv: Boolean = false,
mibibytes: Boolean = false
)
object Options:
val Default = Options()
val builder = OParser.builder[Options]
val parser1 =
import builder._
OParser.sequence(
programName("scala-stream"),
head("ScalaStream", s"$Version"),
opt[Unit]('l', "list").text("List available devices").action((_, x) => x.copy(list = true)),
opt[Int]('d', "device")
.text(s"Select device at <device>, defaults to ${Default.device}")
.action((v, x) => x.copy(device = v)),
opt[Int]('n', "numtimes")
.text(s"Run the test <numtimes> times (NUM >= 2), defaults to ${Default.numtimes}")
.validate {
case n if n >= 2 => success
case n => failure(s"$n <= 2")
}
.action((n, x) => x.copy(numtimes = n)),
opt[Int]('a', "arraysize")
.text(s"Use <arraysize> elements in the array, defaults to ${Default.arraysize}")
.action((v, x) => x.copy(arraysize = v)),
opt[Unit]('f', "float")
.text("Use floats (rather than doubles)")
.action((_, x) => x.copy(float = true)),
opt[Unit]('t', "triad_only")
.text("Only run triad")
.action((_, x) => x.copy(triad_only = true)),
opt[Unit]('n', "nstream_only")
.text("Only run nstream")
.action((_, x) => x.copy(nstream_only = true)),
opt[Unit]('c', "csv").text("Output as csv table").action((_, x) => x.copy(csv = true)),
opt[Unit]('m', "mibibytes")
.text("Use MiB=2^20 for bandwidth calculation (default MB=10^6)")
.action((_, x) => x.copy(mibibytes = true)),
help('h', "help").text("prints this usage text")
)
enum Benchmark:
case All, NStream, Triad
implicit class RichDuration(private val d: Duration) extends AnyVal:
def seconds: Double = d.toUnit(SECONDS)
def validate[A: Fractional](vec: Data[A], config: Config[A], dotSum: Option[A] = None): Unit =
var (goldA, goldB, goldC) = config.init
for _ <- 0 until config.options.numtimes do
config.benchmark match
case Benchmark.All =>
goldC = goldA
goldB = config.scalar * goldC
goldC = goldA + goldB
goldA = goldB + config.scalar * goldC
case Benchmark.Triad =>
goldA = goldB + config.scalar * goldC
case Benchmark.NStream =>
goldA += goldB + config.scalar * goldC
val tolerance = config.ulp * (100.fractional)
def validateXs(name: String, xs: Seq[A], from: A): Unit =
val error = xs.map(x => (x - from).abs_).fold(0.fractional)(_ + _) / xs.size.fractional
if error > tolerance then
Console.err.println(s"Validation failed on $name. Average error $error ")
validateXs("a", vec.a, goldA)
validateXs("b", vec.b, goldB)
validateXs("c", vec.c, goldC)
dotSum.foreach { sum =>
val goldSum = (goldA * goldB) * (config.options.arraysize).fractional
val error = ((sum - goldSum) / goldSum).abs_
if error > 1.fractional / 100000000.fractional then
Console.err.println(
s"Validation failed on sum. Error $error \nSum was $sum but should be $goldSum"
)
}
inline def run[A: Fractional: ClassTag](
name: String,
config: Config[A],
mkStream: Config[A] => ScalaStream[A]
): Unit =
val opt = config.options
val arrayBytes = opt.arraysize * config.typeSize
val totalBytes = arrayBytes * 3
val (megaScale, megaSuffix, gigaScale, gigaSuffix) =
if !opt.mibibytes then (1.0e-6, "MB", 1.0e-9, "GB")
else (pow(2.0, -20), "MiB", pow(2.0, -30), "GiB")
if !opt.csv then
val vendor = System.getProperty("java.vendor")
val ver = System.getProperty("java.version")
val home = System.getProperty("java.home")
println(
s"""BabelStream
|Version: $Version
|Implementation: $name; Scala (Java $ver; $vendor; home=$home)""".stripMargin
)
println(s"Running ${config.benchmark match {
case Benchmark.All => "kernels"
case Benchmark.Triad => "triad"
case Benchmark.NStream => "nstream"
}} ${opt.numtimes} times")
if config.benchmark == Benchmark.Triad then println(s"Number of elements: ${opt.arraysize}")
println(s"Precision: ${if opt.float then "float" else "double"}")
println(
f"Array size: ${megaScale * arrayBytes}%.1f $megaSuffix (=${gigaScale * arrayBytes}%.1f $gigaSuffix)"
)
println(
f"Total size: ${megaScale * totalBytes}%.1f $megaSuffix (=${gigaScale * totalBytes}%.1f $gigaSuffix)"
)
def mkRow(xs: Vector[FiniteDuration], name: String, totalBytes: Int) =
val tail = xs.tail
(tail.minOption.map(_.seconds), tail.maxOption.map(_.seconds)) match
case (Some(min), Some(max)) =>
val avg = (tail.foldLeft(Duration.Zero)(_ + _) / tail.size.toDouble).seconds
val mbps = megaScale * totalBytes.toDouble / min
if opt.csv then
Vector(
"function" -> name,
"num_times" -> opt.numtimes.toString,
"n_elements" -> opt.arraysize.toString,
"sizeof" -> totalBytes.toString,
s"max_m${if opt.mibibytes then "i" else ""}bytes_per_sec" -> mbps.toString,
"min_runtime" -> min.toString,
"max_runtime" -> max.toString,
"avg_runtime" -> avg.toString
)
else
Vector(
"Function" -> name,
s"M${if opt.mibibytes then "i" else ""}Bytes/sec" -> f"$mbps%.3f",
"Min (sec)" -> f"$min%.5f",
"Max" -> f"$max%.5f",
"Average" -> f"$avg%.5f"
)
case (_, _) => sys.error(s"No min/max element for $name(size=$totalBytes)")
def tabulate(rows: Vector[(String, String)]*): Unit = rows.toList match
case Nil => sys.error(s"Empty tabulation")
case header :: _ =>
val padding = if opt.csv then 0 else 12
val sep = if opt.csv then "," else ""
println(header.map(_._1.padTo(padding, ' ')).mkString(sep))
println(rows.map(_.map(_._2.padTo(padding, ' ')).mkString(sep)).mkString("\n"))
val stream = mkStream(config)
stream.initArrays()
config.benchmark match
case Benchmark.All =>
val (results, sum) = stream.runAll(opt.numtimes)
validate(stream.data(), config, Some(sum))
tabulate(
mkRow(results.copy, "Copy", 2 * arrayBytes),
mkRow(results.mul, "Mul", 2 * arrayBytes),
mkRow(results.add, "Add", 3 * arrayBytes),
mkRow(results.triad, "Triad", 3 * arrayBytes),
mkRow(results.dot, "Dot", 2 * arrayBytes)
)
case Benchmark.NStream =>
val result = stream.runNStream(opt.numtimes)
validate(stream.data(), config)
tabulate(mkRow(result, "Nstream", 4 * arrayBytes))
case Benchmark.Triad =>
val results = stream.runTriad(opt.numtimes)
val totalBytes = 3 * arrayBytes * opt.numtimes
val bandwidth = megaScale * (totalBytes / results.seconds)
println(f"Runtime (seconds): ${results.seconds}%.5f")
println(f"Bandwidth ($gigaSuffix/s): $bandwidth%.3f ")
inline def devices[A: Fractional: ClassTag]: Vector[(String, Config[A] => ScalaStream[A])] =
Vector(
"Scala Parallel Collections" -> (ParStream(_)),
"Java 8 Stream" -> (J8SStream(_)),
"Threaded" -> (ThreadStream(_)),
"Serial" -> (PlainStream(_))
)
inline def runWith[A: Fractional: ClassTag](i: Int, config: Config[A]): Unit =
devices[A].lift(i) match
case None => println(s"Device index out of bounds: $i")
case Some((name, mkStream)) => run(name, config, mkStream)
def main(args: Array[String]): Unit =
def handleOpt(opt: Options) =
val benchmark = (opt.nstream_only, opt.triad_only) match
case (true, false) => Benchmark.NStream
case (false, true) => Benchmark.Triad
case (false, false) => Benchmark.All
case (true, true) =>
throw new RuntimeException(
"Both triad and nstream are enabled, pick one or omit both to run all benchmarks"
)
if opt.list then
devices[Float].zipWithIndex.foreach { case ((name, _), i) => println(s"$i: $name") }
else if opt.float then
runWith(
opt.device,
Config(
options = opt,
benchmark = benchmark,
typeSize = 4, // 32bit
ulp = math.ulp(Float.MaxValue),
scalar = 0.4f,
init = (0.1f, 0.2f, 0.0f)
)
)
else
runWith(
opt.device,
Config(
options = opt,
benchmark = benchmark,
typeSize = 8,
ulp = math.ulp(Double.MaxValue),
scalar = 0.4, // 64bit
init = (0.1, 0.2, 0.0)
)
)
OParser.parse(Options.parser1, args, Options.Default) match
case Some(config) => handleOpt(config)
case _ => sys.exit(1)

View File

@ -0,0 +1,68 @@
package scalastream
import net.openhft.affinity.{AffinityStrategies, AffinityThreadFactory}
import scalastream.App.{Config, Data}
import java.util.concurrent.{Callable, Executors}
import scala.collection.immutable.ArraySeq
import scala.reflect.ClassTag
object ThreadStream {}
class ThreadStream[@specialized(Float, Double) A: Fractional: ClassTag](val config: Config[A])
extends ScalaStream[A]:
private var a: Array[A] = _
private var b: Array[A] = _
private var c: Array[A] = _
private val scalar: A = config.scalar
private val chunks: Int = sys.runtime.availableProcessors()
private val pool = Executors.newFixedThreadPool(
chunks,
new AffinityThreadFactory("scala-stream", true, AffinityStrategies.DIFFERENT_CORE)
)
private val indices = (0 until config.options.arraysize)
.grouped(config.options.arraysize / chunks)
.toSeq
private inline def forEachAll[C](c: => C)(f: (C, Int) => Unit): Seq[C] =
import scala.jdk.CollectionConverters._
val xs = pool
.invokeAll(
indices.map { r =>
{ () =>
val ctx = c
r.foreach(f(ctx, _))
ctx
}: Callable[C]
}.asJavaCollection
)
.asScala
.map(_.get())
.toSeq
xs
override inline def initArrays(): Unit =
a = Array.ofDim(config.options.arraysize)
b = Array.ofDim(config.options.arraysize)
c = Array.ofDim(config.options.arraysize)
forEachAll(()) { (_, i) =>
a(i) = config.init._1
b(i) = config.init._2
c(i) = config.init._3
}
()
class Box(var value: A)
override inline def copy(): Unit = { forEachAll(())((_, i) => c(i) = a(i)); () }
override inline def mul(): Unit = { forEachAll(())((_, i) => b(i) = scalar * c(i)); () }
override inline def add(): Unit = { forEachAll(())((_, i) => c(i) = a(i) + b(i)); () }
override inline def triad(): Unit = { forEachAll(())((_, i) => a(i) = b(i) + scalar * c(i)); () }
override inline def nstream(): Unit = { forEachAll(())((_, i) => a(i) = b(i) * scalar * c(i)); () }
override inline def dot(): A =
forEachAll(Box(0.fractional))((acc, i) => acc.value = acc.value + (a(i) * b(i)))
.map(_.value)
.fold(0.fractional)(_ + _)
override inline def data(): Data[A] = Data(a.to(ArraySeq), b.to(ArraySeq), c.to(ArraySeq))