Merge branch 'main' into java
This commit is contained in:
commit
3d2f55da4a
31
.github/workflows/main.yaml
vendored
31
.github/workflows/main.yaml
vendored
@ -3,6 +3,7 @@ on: [push, pull_request]
|
||||
|
||||
|
||||
jobs:
|
||||
|
||||
test-java:
|
||||
runs-on: ubuntu-18.04
|
||||
defaults:
|
||||
@ -15,6 +16,36 @@ jobs:
|
||||
- name: Test run
|
||||
if: ${{ ! cancelled() }}
|
||||
run: java -jar target/java-stream.jar --arraysize 2048
|
||||
|
||||
test-julia:
|
||||
runs-on: ubuntu-18.04
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ./JuliaStream.jl
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Setup project
|
||||
run: julia --project -e 'import Pkg; Pkg.instantiate()'
|
||||
- name: Test run PlainStream.jl
|
||||
if: ${{ ! cancelled() }}
|
||||
run: julia --project src/PlainStream.jl --arraysize 2048
|
||||
- name: Test run ThreadedStream.jl
|
||||
if: ${{ ! cancelled() }}
|
||||
run: julia --threads 2 --project src/ThreadedStream.jl --arraysize 2048
|
||||
- name: Test run DistributedStream.jl (no flag)
|
||||
if: ${{ ! cancelled() }}
|
||||
run: julia --project src/DistributedStream.jl --arraysize 2048
|
||||
- name: Test run DistributedStream.jl (-p 2)
|
||||
if: ${{ ! cancelled() }}
|
||||
run: julia -p 2 --project src/DistributedStream.jl --arraysize 2048
|
||||
- name: Test run CUDAStream.jl
|
||||
if: ${{ ! cancelled() }}
|
||||
run: julia --project src/CUDAStream.jl --list
|
||||
- name: Test run AMDGPUStream.jl
|
||||
if: ${{ ! cancelled() }}
|
||||
run: julia --project src/AMDGPUStream.jl --list
|
||||
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
|
||||
14
CITATION.cff
Normal file
14
CITATION.cff
Normal file
@ -0,0 +1,14 @@
|
||||
cff-version: 1.1.0
|
||||
message: If you use this software, please cite it as below.
|
||||
authors:
|
||||
- family-names: Deakin
|
||||
given-names: Tom
|
||||
affiliation: University of Bristol
|
||||
website: https://hpc.tomdeakin.com
|
||||
- family-names: McIntosh-Smith
|
||||
given-names: Simon
|
||||
affiliation: University of Bristol
|
||||
website: https://uob-hpc.github.io
|
||||
title: BabelStream
|
||||
version: 3.4
|
||||
date-released: 2019-04-10
|
||||
2
JuliaStream.jl/.JuliaFormatter.toml
Normal file
2
JuliaStream.jl/.JuliaFormatter.toml
Normal file
@ -0,0 +1,2 @@
|
||||
indent = 2
|
||||
margin = 100
|
||||
5
JuliaStream.jl/.gitignore
vendored
Normal file
5
JuliaStream.jl/.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
*.jl.cov
|
||||
*.jl.*.cov
|
||||
*.jl.mem
|
||||
/docs/build/
|
||||
/docs/Manifest.toml
|
||||
415
JuliaStream.jl/AMDGPU/Manifest.toml
Normal file
415
JuliaStream.jl/AMDGPU/Manifest.toml
Normal file
@ -0,0 +1,415 @@
|
||||
# This file is machine-generated - editing it directly is not advised
|
||||
|
||||
[[AMDGPU]]
|
||||
deps = ["AbstractFFTs", "Adapt", "BinaryProvider", "CEnum", "GPUArrays", "GPUCompiler", "HIP_jll", "LLVM", "Libdl", "LinearAlgebra", "MacroTools", "Pkg", "Printf", "ROCmDeviceLibs_jll", "Random", "Requires", "Setfield", "hsa_rocr_jll"]
|
||||
git-tree-sha1 = "d64c97447a753cfbf0158d6c7be513f34526d559"
|
||||
uuid = "21141c5a-9bdb-4563-92ae-f87d6854732e"
|
||||
version = "0.2.12"
|
||||
|
||||
[[AbstractFFTs]]
|
||||
deps = ["LinearAlgebra"]
|
||||
git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0"
|
||||
uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
|
||||
version = "1.0.1"
|
||||
|
||||
[[Adapt]]
|
||||
deps = ["LinearAlgebra"]
|
||||
git-tree-sha1 = "84918055d15b3114ede17ac6a7182f68870c16f7"
|
||||
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
|
||||
version = "3.3.1"
|
||||
|
||||
[[ArgParse]]
|
||||
deps = ["Logging", "TextWrap"]
|
||||
git-tree-sha1 = "3102bce13da501c9104df33549f511cd25264d7d"
|
||||
uuid = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
|
||||
version = "1.1.4"
|
||||
|
||||
[[ArgTools]]
|
||||
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
|
||||
|
||||
[[Artifacts]]
|
||||
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
|
||||
|
||||
[[Base64]]
|
||||
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
|
||||
|
||||
[[BinaryProvider]]
|
||||
deps = ["Libdl", "Logging", "SHA"]
|
||||
git-tree-sha1 = "ecdec412a9abc8db54c0efc5548c64dfce072058"
|
||||
uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
|
||||
version = "0.5.10"
|
||||
|
||||
[[Bzip2_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "19a35467a82e236ff51bc17a3a44b69ef35185a2"
|
||||
uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0"
|
||||
version = "1.0.8+0"
|
||||
|
||||
[[CEnum]]
|
||||
git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9"
|
||||
uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
|
||||
version = "0.4.1"
|
||||
|
||||
[[ConstructionBase]]
|
||||
deps = ["LinearAlgebra"]
|
||||
git-tree-sha1 = "f74e9d5388b8620b4cee35d4c5a618dd4dc547f4"
|
||||
uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
|
||||
version = "1.3.0"
|
||||
|
||||
[[Dates]]
|
||||
deps = ["Printf"]
|
||||
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
|
||||
|
||||
[[Downloads]]
|
||||
deps = ["ArgTools", "LibCURL", "NetworkOptions"]
|
||||
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
|
||||
|
||||
[[Elfutils_jll]]
|
||||
deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "XZ_jll", "Zlib_jll", "argp_standalone_jll", "fts_jll", "obstack_jll"]
|
||||
git-tree-sha1 = "8f9fcde6d89b0a3ca51cb2028beab462705c5436"
|
||||
uuid = "ab5a07f8-06af-567f-a878-e8bb879eba5a"
|
||||
version = "0.182.0+0"
|
||||
|
||||
[[ExprTools]]
|
||||
git-tree-sha1 = "b7e3d17636b348f005f11040025ae8c6f645fe92"
|
||||
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
|
||||
version = "0.1.6"
|
||||
|
||||
[[Future]]
|
||||
deps = ["Random"]
|
||||
uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
|
||||
|
||||
[[GPUArrays]]
|
||||
deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization", "Statistics"]
|
||||
git-tree-sha1 = "ececbf05f8904c92814bdbd0aafd5540b0bf2e9a"
|
||||
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
|
||||
version = "7.0.1"
|
||||
|
||||
[[GPUCompiler]]
|
||||
deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"]
|
||||
git-tree-sha1 = "4ed2616d5e656c8716736b64da86755467f26cf5"
|
||||
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
|
||||
version = "0.12.9"
|
||||
|
||||
[[HIP_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "ROCmCompilerSupport_jll", "ROCmDeviceLibs_jll", "ROCmOpenCLRuntime_jll", "hsa_rocr_jll"]
|
||||
git-tree-sha1 = "5097d8f7b6842156ab0928371b3d03fefd8decab"
|
||||
uuid = "2696aab5-0948-5276-aa9a-2a86a37016b8"
|
||||
version = "4.0.0+1"
|
||||
|
||||
[[InteractiveUtils]]
|
||||
deps = ["Markdown"]
|
||||
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
|
||||
|
||||
[[JLLWrappers]]
|
||||
deps = ["Preferences"]
|
||||
git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
|
||||
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
|
||||
version = "1.3.0"
|
||||
|
||||
[[LLVM]]
|
||||
deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"]
|
||||
git-tree-sha1 = "23a47d417a3cd9c2e73c854bac7dd4731c105ef7"
|
||||
uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
|
||||
version = "4.4.0"
|
||||
|
||||
[[LLVMExtra_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "9c360e5ce980b88bb31a7b086dbb19469008154b"
|
||||
uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab"
|
||||
version = "0.0.10+0"
|
||||
|
||||
[[LibCURL]]
|
||||
deps = ["LibCURL_jll", "MozillaCACerts_jll"]
|
||||
uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
|
||||
|
||||
[[LibCURL_jll]]
|
||||
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
|
||||
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
|
||||
|
||||
[[LibGit2]]
|
||||
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
|
||||
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
|
||||
|
||||
[[LibSSH2_jll]]
|
||||
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
|
||||
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
|
||||
|
||||
[[Libdl]]
|
||||
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
|
||||
|
||||
[[Libgcrypt_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgpg_error_jll", "Pkg"]
|
||||
git-tree-sha1 = "64613c82a59c120435c067c2b809fc61cf5166ae"
|
||||
uuid = "d4300ac3-e22c-5743-9152-c294e39db1e4"
|
||||
version = "1.8.7+0"
|
||||
|
||||
[[Libglvnd_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll"]
|
||||
git-tree-sha1 = "7739f837d6447403596a75d19ed01fd08d6f56bf"
|
||||
uuid = "7e76a0d4-f3c7-5321-8279-8d96eeed0f29"
|
||||
version = "1.3.0+3"
|
||||
|
||||
[[Libgpg_error_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "c333716e46366857753e273ce6a69ee0945a6db9"
|
||||
uuid = "7add5ba3-2f88-524e-9cd5-f83b8a55f7b8"
|
||||
version = "1.42.0+0"
|
||||
|
||||
[[Libiconv_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "42b62845d70a619f063a7da093d995ec8e15e778"
|
||||
uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
|
||||
version = "1.16.1+1"
|
||||
|
||||
[[LinearAlgebra]]
|
||||
deps = ["Libdl"]
|
||||
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
|
||||
|
||||
[[Logging]]
|
||||
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
|
||||
|
||||
[[MacroTools]]
|
||||
deps = ["Markdown", "Random"]
|
||||
git-tree-sha1 = "0fb723cd8c45858c22169b2e42269e53271a6df7"
|
||||
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
|
||||
version = "0.5.7"
|
||||
|
||||
[[Markdown]]
|
||||
deps = ["Base64"]
|
||||
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
|
||||
|
||||
[[MbedTLS_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
|
||||
|
||||
[[MozillaCACerts_jll]]
|
||||
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
|
||||
|
||||
[[NUMA_jll]]
|
||||
deps = ["Libdl", "Pkg"]
|
||||
git-tree-sha1 = "778f9bd14400cff2c32ed357e12766ac0e3d766e"
|
||||
uuid = "7f51dc2b-bb24-59f8-b771-bb1490e4195d"
|
||||
version = "2.0.13+1"
|
||||
|
||||
[[NetworkOptions]]
|
||||
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
|
||||
|
||||
[[OrderedCollections]]
|
||||
git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
|
||||
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
|
||||
version = "1.4.1"
|
||||
|
||||
[[Parameters]]
|
||||
deps = ["OrderedCollections", "UnPack"]
|
||||
git-tree-sha1 = "2276ac65f1e236e0a6ea70baff3f62ad4c625345"
|
||||
uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
|
||||
version = "0.12.2"
|
||||
|
||||
[[Pkg]]
|
||||
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
|
||||
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
|
||||
|
||||
[[Preferences]]
|
||||
deps = ["TOML"]
|
||||
git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a"
|
||||
uuid = "21216c6a-2e73-6563-6e65-726566657250"
|
||||
version = "1.2.2"
|
||||
|
||||
[[Printf]]
|
||||
deps = ["Unicode"]
|
||||
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
|
||||
|
||||
[[REPL]]
|
||||
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
|
||||
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
|
||||
|
||||
[[ROCmCompilerSupport_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "ROCmDeviceLibs_jll", "hsa_rocr_jll"]
|
||||
git-tree-sha1 = "56ddcfb5d8b60c9f8c1bc619886f8d363fd1926d"
|
||||
uuid = "8fbdd1d2-db62-5cd0-981e-905da1486e17"
|
||||
version = "4.0.0+1"
|
||||
|
||||
[[ROCmDeviceLibs_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
|
||||
git-tree-sha1 = "d764f0f28b5af89aa004871a6a38e5d061f77257"
|
||||
uuid = "873c0968-716b-5aa7-bb8d-d1e2e2aeff2d"
|
||||
version = "4.0.0+0"
|
||||
|
||||
[[ROCmOpenCLRuntime_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg", "ROCmCompilerSupport_jll", "ROCmDeviceLibs_jll", "Xorg_libX11_jll", "Xorg_xorgproto_jll", "hsa_rocr_jll"]
|
||||
git-tree-sha1 = "f9e3e2cb40a7990535efa7da9b9dd0e0b458a973"
|
||||
uuid = "10ae2a08-2eea-53f8-8c20-eec175020e9f"
|
||||
version = "4.0.0+1"
|
||||
|
||||
[[Random]]
|
||||
deps = ["Serialization"]
|
||||
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
|
||||
|
||||
[[Requires]]
|
||||
deps = ["UUIDs"]
|
||||
git-tree-sha1 = "4036a3bd08ac7e968e27c203d45f5fff15020621"
|
||||
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
|
||||
version = "1.1.3"
|
||||
|
||||
[[SHA]]
|
||||
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
|
||||
|
||||
[[Serialization]]
|
||||
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
|
||||
|
||||
[[Setfield]]
|
||||
deps = ["ConstructionBase", "Future", "MacroTools", "Requires"]
|
||||
git-tree-sha1 = "fca29e68c5062722b5b4435594c3d1ba557072a3"
|
||||
uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46"
|
||||
version = "0.7.1"
|
||||
|
||||
[[Sockets]]
|
||||
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
|
||||
|
||||
[[SparseArrays]]
|
||||
deps = ["LinearAlgebra", "Random"]
|
||||
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
|
||||
|
||||
[[Statistics]]
|
||||
deps = ["LinearAlgebra", "SparseArrays"]
|
||||
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
|
||||
|
||||
[[TOML]]
|
||||
deps = ["Dates"]
|
||||
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
|
||||
|
||||
[[Tar]]
|
||||
deps = ["ArgTools", "SHA"]
|
||||
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
|
||||
|
||||
[[TextWrap]]
|
||||
git-tree-sha1 = "9250ef9b01b66667380cf3275b3f7488d0e25faf"
|
||||
uuid = "b718987f-49a8-5099-9789-dcd902bef87d"
|
||||
version = "1.0.1"
|
||||
|
||||
[[TimerOutputs]]
|
||||
deps = ["ExprTools", "Printf"]
|
||||
git-tree-sha1 = "209a8326c4f955e2442c07b56029e88bb48299c7"
|
||||
uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
|
||||
version = "0.5.12"
|
||||
|
||||
[[UUIDs]]
|
||||
deps = ["Random", "SHA"]
|
||||
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
|
||||
|
||||
[[UnPack]]
|
||||
git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
|
||||
uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
|
||||
version = "1.0.2"
|
||||
|
||||
[[Unicode]]
|
||||
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
|
||||
|
||||
[[XML2_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"]
|
||||
git-tree-sha1 = "1acf5bdf07aa0907e0a37d3718bb88d4b687b74a"
|
||||
uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a"
|
||||
version = "2.9.12+0"
|
||||
|
||||
[[XSLT_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Libgpg_error_jll", "Libiconv_jll", "Pkg", "XML2_jll", "Zlib_jll"]
|
||||
git-tree-sha1 = "91844873c4085240b95e795f692c4cec4d805f8a"
|
||||
uuid = "aed1982a-8fda-507f-9586-7b0439959a61"
|
||||
version = "1.1.34+0"
|
||||
|
||||
[[XZ_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "a921669cd9a45c23031fd4eb904f5cc3d20de415"
|
||||
uuid = "ffd25f8a-64ca-5728-b0f7-c24cf3aae800"
|
||||
version = "5.2.5+2"
|
||||
|
||||
[[Xorg_libX11_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll", "Xorg_xtrans_jll"]
|
||||
git-tree-sha1 = "5be649d550f3f4b95308bf0183b82e2582876527"
|
||||
uuid = "4f6342f7-b3d2-589e-9d20-edeb45f2b2bc"
|
||||
version = "1.6.9+4"
|
||||
|
||||
[[Xorg_libXau_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "4e490d5c960c314f33885790ed410ff3a94ce67e"
|
||||
uuid = "0c0b7dd1-d40b-584c-a123-a41640f87eec"
|
||||
version = "1.0.9+4"
|
||||
|
||||
[[Xorg_libXdmcp_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "4fe47bd2247248125c428978740e18a681372dd4"
|
||||
uuid = "a3789734-cfe1-5b06-b2d0-1dd0d9d62d05"
|
||||
version = "1.1.3+4"
|
||||
|
||||
[[Xorg_libXext_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
|
||||
git-tree-sha1 = "b7c0aa8c376b31e4852b360222848637f481f8c3"
|
||||
uuid = "1082639a-0dae-5f34-9b06-72781eeb8cb3"
|
||||
version = "1.3.4+4"
|
||||
|
||||
[[Xorg_libpthread_stubs_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "6783737e45d3c59a4a4c4091f5f88cdcf0908cbb"
|
||||
uuid = "14d82f49-176c-5ed1-bb49-ad3f5cbd8c74"
|
||||
version = "0.1.0+3"
|
||||
|
||||
[[Xorg_libxcb_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "XSLT_jll", "Xorg_libXau_jll", "Xorg_libXdmcp_jll", "Xorg_libpthread_stubs_jll"]
|
||||
git-tree-sha1 = "daf17f441228e7a3833846cd048892861cff16d6"
|
||||
uuid = "c7cfdc94-dc32-55de-ac96-5a1b8d977c5b"
|
||||
version = "1.13.0+3"
|
||||
|
||||
[[Xorg_xorgproto_jll]]
|
||||
deps = ["Libdl", "Pkg"]
|
||||
git-tree-sha1 = "9a9eb8ce756fe0bca01b4be16da770e18d264972"
|
||||
uuid = "c4d99508-4286-5418-9131-c86396af500b"
|
||||
version = "2019.2.0+2"
|
||||
|
||||
[[Xorg_xtrans_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "79c31e7844f6ecf779705fbc12146eb190b7d845"
|
||||
uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10"
|
||||
version = "1.4.0+3"
|
||||
|
||||
[[Zlib_jll]]
|
||||
deps = ["Libdl"]
|
||||
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
|
||||
|
||||
[[argp_standalone_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "feaf9f6293003c2bf53056fd6930d677ed340b34"
|
||||
uuid = "c53206cc-00f7-50bf-ad1e-3ae1f6e49bc3"
|
||||
version = "1.3.1+0"
|
||||
|
||||
[[fts_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "78732b942383d2cb521df8a1a0814911144e663d"
|
||||
uuid = "d65627f6-89bd-53e8-8ab5-8b75ff535eee"
|
||||
version = "1.2.7+1"
|
||||
|
||||
[[hsa_rocr_jll]]
|
||||
deps = ["Artifacts", "Elfutils_jll", "JLLWrappers", "Libdl", "NUMA_jll", "Pkg", "Zlib_jll", "hsakmt_roct_jll"]
|
||||
git-tree-sha1 = "df8d73efec8b1e53ad527d208f5343c0368f0fcd"
|
||||
uuid = "dd59ff1a-a01a-568d-8b29-0669330f116a"
|
||||
version = "4.0.0+0"
|
||||
|
||||
[[hsakmt_roct_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "NUMA_jll", "Pkg"]
|
||||
git-tree-sha1 = "80e0c9940e15cfd6f1f1e9d9f3953ec4d48d3d4a"
|
||||
uuid = "1cecccd7-a9b6-5045-9cdc-a44c19b16d76"
|
||||
version = "4.0.0+0"
|
||||
|
||||
[[nghttp2_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
|
||||
|
||||
[[obstack_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "1c4a6b66e934fc6db4649cb2910c72f53bbfea7e"
|
||||
uuid = "c88a4935-d25e-5644-aacc-5db6f1b8ef79"
|
||||
version = "1.2.2+0"
|
||||
|
||||
[[p7zip_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
|
||||
7
JuliaStream.jl/AMDGPU/Project.toml
Normal file
7
JuliaStream.jl/AMDGPU/Project.toml
Normal file
@ -0,0 +1,7 @@
|
||||
[deps]
|
||||
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
|
||||
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
|
||||
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
|
||||
|
||||
[compat]
|
||||
julia = "1.6"
|
||||
316
JuliaStream.jl/CUDA/Manifest.toml
Normal file
316
JuliaStream.jl/CUDA/Manifest.toml
Normal file
@ -0,0 +1,316 @@
|
||||
# This file is machine-generated - editing it directly is not advised
|
||||
|
||||
[[AbstractFFTs]]
|
||||
deps = ["LinearAlgebra"]
|
||||
git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0"
|
||||
uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
|
||||
version = "1.0.1"
|
||||
|
||||
[[Adapt]]
|
||||
deps = ["LinearAlgebra"]
|
||||
git-tree-sha1 = "84918055d15b3114ede17ac6a7182f68870c16f7"
|
||||
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
|
||||
version = "3.3.1"
|
||||
|
||||
[[ArgParse]]
|
||||
deps = ["Logging", "TextWrap"]
|
||||
git-tree-sha1 = "3102bce13da501c9104df33549f511cd25264d7d"
|
||||
uuid = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
|
||||
version = "1.1.4"
|
||||
|
||||
[[ArgTools]]
|
||||
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
|
||||
|
||||
[[Artifacts]]
|
||||
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
|
||||
|
||||
[[BFloat16s]]
|
||||
deps = ["LinearAlgebra", "Test"]
|
||||
git-tree-sha1 = "4af69e205efc343068dc8722b8dfec1ade89254a"
|
||||
uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
|
||||
version = "0.1.0"
|
||||
|
||||
[[Base64]]
|
||||
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
|
||||
|
||||
[[CEnum]]
|
||||
git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9"
|
||||
uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
|
||||
version = "0.4.1"
|
||||
|
||||
[[CUDA]]
|
||||
deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CompilerSupportLibraries_jll", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "SpecialFunctions", "TimerOutputs"]
|
||||
git-tree-sha1 = "c583f3ccdce071b8a8bce9bf3d5d5409eaf36d2b"
|
||||
uuid = "052768ef-5323-5732-b1bb-66c8b64840ba"
|
||||
version = "3.4.1"
|
||||
|
||||
[[ChainRulesCore]]
|
||||
deps = ["Compat", "LinearAlgebra", "SparseArrays"]
|
||||
git-tree-sha1 = "bdc0937269321858ab2a4f288486cb258b9a0af7"
|
||||
uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
|
||||
version = "1.3.0"
|
||||
|
||||
[[Compat]]
|
||||
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
|
||||
git-tree-sha1 = "727e463cfebd0c7b999bbf3e9e7e16f254b94193"
|
||||
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
|
||||
version = "3.34.0"
|
||||
|
||||
[[CompilerSupportLibraries_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
|
||||
|
||||
[[Dates]]
|
||||
deps = ["Printf"]
|
||||
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
|
||||
|
||||
[[DelimitedFiles]]
|
||||
deps = ["Mmap"]
|
||||
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
|
||||
|
||||
[[Distributed]]
|
||||
deps = ["Random", "Serialization", "Sockets"]
|
||||
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
|
||||
|
||||
[[DocStringExtensions]]
|
||||
deps = ["LibGit2"]
|
||||
git-tree-sha1 = "a32185f5428d3986f47c2ab78b1f216d5e6cc96f"
|
||||
uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
|
||||
version = "0.8.5"
|
||||
|
||||
[[Downloads]]
|
||||
deps = ["ArgTools", "LibCURL", "NetworkOptions"]
|
||||
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
|
||||
|
||||
[[ExprTools]]
|
||||
git-tree-sha1 = "b7e3d17636b348f005f11040025ae8c6f645fe92"
|
||||
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
|
||||
version = "0.1.6"
|
||||
|
||||
[[GPUArrays]]
|
||||
deps = ["Adapt", "LinearAlgebra", "Printf", "Random", "Serialization", "Statistics"]
|
||||
git-tree-sha1 = "8fac1cf7d6ce0f2249c7acaf25d22e1e85c4a07f"
|
||||
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
|
||||
version = "8.0.2"
|
||||
|
||||
[[GPUCompiler]]
|
||||
deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"]
|
||||
git-tree-sha1 = "4ed2616d5e656c8716736b64da86755467f26cf5"
|
||||
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
|
||||
version = "0.12.9"
|
||||
|
||||
[[InteractiveUtils]]
|
||||
deps = ["Markdown"]
|
||||
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
|
||||
|
||||
[[IrrationalConstants]]
|
||||
git-tree-sha1 = "f76424439413893a832026ca355fe273e93bce94"
|
||||
uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
|
||||
version = "0.1.0"
|
||||
|
||||
[[JLLWrappers]]
|
||||
deps = ["Preferences"]
|
||||
git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
|
||||
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
|
||||
version = "1.3.0"
|
||||
|
||||
[[LLVM]]
|
||||
deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"]
|
||||
git-tree-sha1 = "23a47d417a3cd9c2e73c854bac7dd4731c105ef7"
|
||||
uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
|
||||
version = "4.4.0"
|
||||
|
||||
[[LLVMExtra_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "9c360e5ce980b88bb31a7b086dbb19469008154b"
|
||||
uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab"
|
||||
version = "0.0.10+0"
|
||||
|
||||
[[LazyArtifacts]]
|
||||
deps = ["Artifacts", "Pkg"]
|
||||
uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
|
||||
|
||||
[[LibCURL]]
|
||||
deps = ["LibCURL_jll", "MozillaCACerts_jll"]
|
||||
uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
|
||||
|
||||
[[LibCURL_jll]]
|
||||
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
|
||||
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
|
||||
|
||||
[[LibGit2]]
|
||||
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
|
||||
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
|
||||
|
||||
[[LibSSH2_jll]]
|
||||
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
|
||||
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
|
||||
|
||||
[[Libdl]]
|
||||
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
|
||||
|
||||
[[LinearAlgebra]]
|
||||
deps = ["Libdl"]
|
||||
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
|
||||
|
||||
[[LogExpFunctions]]
|
||||
deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"]
|
||||
git-tree-sha1 = "3d682c07e6dd250ed082f883dc88aee7996bf2cc"
|
||||
uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
|
||||
version = "0.3.0"
|
||||
|
||||
[[Logging]]
|
||||
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
|
||||
|
||||
[[Markdown]]
|
||||
deps = ["Base64"]
|
||||
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
|
||||
|
||||
[[MbedTLS_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
|
||||
|
||||
[[Mmap]]
|
||||
uuid = "a63ad114-7e13-5084-954f-fe012c677804"
|
||||
|
||||
[[MozillaCACerts_jll]]
|
||||
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
|
||||
|
||||
[[NetworkOptions]]
|
||||
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
|
||||
|
||||
[[OpenSpecFun_jll]]
|
||||
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
|
||||
uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
|
||||
version = "0.5.5+0"
|
||||
|
||||
[[OrderedCollections]]
|
||||
git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
|
||||
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
|
||||
version = "1.4.1"
|
||||
|
||||
[[Parameters]]
|
||||
deps = ["OrderedCollections", "UnPack"]
|
||||
git-tree-sha1 = "2276ac65f1e236e0a6ea70baff3f62ad4c625345"
|
||||
uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
|
||||
version = "0.12.2"
|
||||
|
||||
[[Pkg]]
|
||||
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
|
||||
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
|
||||
|
||||
[[Preferences]]
|
||||
deps = ["TOML"]
|
||||
git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a"
|
||||
uuid = "21216c6a-2e73-6563-6e65-726566657250"
|
||||
version = "1.2.2"
|
||||
|
||||
[[Printf]]
|
||||
deps = ["Unicode"]
|
||||
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
|
||||
|
||||
[[REPL]]
|
||||
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
|
||||
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
|
||||
|
||||
[[Random]]
|
||||
deps = ["Serialization"]
|
||||
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
|
||||
|
||||
[[Random123]]
|
||||
deps = ["Libdl", "Random", "RandomNumbers"]
|
||||
git-tree-sha1 = "0e8b146557ad1c6deb1367655e052276690e71a3"
|
||||
uuid = "74087812-796a-5b5d-8853-05524746bad3"
|
||||
version = "1.4.2"
|
||||
|
||||
[[RandomNumbers]]
|
||||
deps = ["Random", "Requires"]
|
||||
git-tree-sha1 = "043da614cc7e95c703498a491e2c21f58a2b8111"
|
||||
uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143"
|
||||
version = "1.5.3"
|
||||
|
||||
[[Reexport]]
|
||||
git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
|
||||
uuid = "189a3867-3050-52da-a836-e630ba90ab69"
|
||||
version = "1.2.2"
|
||||
|
||||
[[Requires]]
|
||||
deps = ["UUIDs"]
|
||||
git-tree-sha1 = "4036a3bd08ac7e968e27c203d45f5fff15020621"
|
||||
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
|
||||
version = "1.1.3"
|
||||
|
||||
[[SHA]]
|
||||
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
|
||||
|
||||
[[Serialization]]
|
||||
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
|
||||
|
||||
[[SharedArrays]]
|
||||
deps = ["Distributed", "Mmap", "Random", "Serialization"]
|
||||
uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
|
||||
|
||||
[[Sockets]]
|
||||
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
|
||||
|
||||
[[SparseArrays]]
|
||||
deps = ["LinearAlgebra", "Random"]
|
||||
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
|
||||
|
||||
[[SpecialFunctions]]
|
||||
deps = ["ChainRulesCore", "LogExpFunctions", "OpenSpecFun_jll"]
|
||||
git-tree-sha1 = "a322a9493e49c5f3a10b50df3aedaf1cdb3244b7"
|
||||
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
|
||||
version = "1.6.1"
|
||||
|
||||
[[Statistics]]
|
||||
deps = ["LinearAlgebra", "SparseArrays"]
|
||||
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
|
||||
|
||||
[[TOML]]
|
||||
deps = ["Dates"]
|
||||
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
|
||||
|
||||
[[Tar]]
|
||||
deps = ["ArgTools", "SHA"]
|
||||
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
|
||||
|
||||
[[Test]]
|
||||
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
|
||||
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||
|
||||
[[TextWrap]]
|
||||
git-tree-sha1 = "9250ef9b01b66667380cf3275b3f7488d0e25faf"
|
||||
uuid = "b718987f-49a8-5099-9789-dcd902bef87d"
|
||||
version = "1.0.1"
|
||||
|
||||
[[TimerOutputs]]
|
||||
deps = ["ExprTools", "Printf"]
|
||||
git-tree-sha1 = "209a8326c4f955e2442c07b56029e88bb48299c7"
|
||||
uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
|
||||
version = "0.5.12"
|
||||
|
||||
[[UUIDs]]
|
||||
deps = ["Random", "SHA"]
|
||||
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
|
||||
|
||||
[[UnPack]]
|
||||
git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
|
||||
uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
|
||||
version = "1.0.2"
|
||||
|
||||
[[Unicode]]
|
||||
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
|
||||
|
||||
[[Zlib_jll]]
|
||||
deps = ["Libdl"]
|
||||
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
|
||||
|
||||
[[nghttp2_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
|
||||
|
||||
[[p7zip_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
|
||||
7
JuliaStream.jl/CUDA/Project.toml
Normal file
7
JuliaStream.jl/CUDA/Project.toml
Normal file
@ -0,0 +1,7 @@
|
||||
[deps]
|
||||
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
|
||||
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
|
||||
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
|
||||
|
||||
[compat]
|
||||
julia = "1.6"
|
||||
547
JuliaStream.jl/KernelAbstractions/Manifest.toml
Normal file
547
JuliaStream.jl/KernelAbstractions/Manifest.toml
Normal file
@ -0,0 +1,547 @@
|
||||
# This file is machine-generated - editing it directly is not advised
|
||||
|
||||
[[AMDGPU]]
|
||||
deps = ["AbstractFFTs", "Adapt", "BinaryProvider", "CEnum", "GPUArrays", "GPUCompiler", "HIP_jll", "LLVM", "Libdl", "LinearAlgebra", "MacroTools", "Pkg", "Printf", "ROCmDeviceLibs_jll", "Random", "Requires", "Setfield", "hsa_rocr_jll"]
|
||||
git-tree-sha1 = "d64c97447a753cfbf0158d6c7be513f34526d559"
|
||||
uuid = "21141c5a-9bdb-4563-92ae-f87d6854732e"
|
||||
version = "0.2.12"
|
||||
|
||||
[[AbstractFFTs]]
|
||||
deps = ["LinearAlgebra"]
|
||||
git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0"
|
||||
uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
|
||||
version = "1.0.1"
|
||||
|
||||
[[Adapt]]
|
||||
deps = ["LinearAlgebra"]
|
||||
git-tree-sha1 = "84918055d15b3114ede17ac6a7182f68870c16f7"
|
||||
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
|
||||
version = "3.3.1"
|
||||
|
||||
[[ArgParse]]
|
||||
deps = ["Logging", "TextWrap"]
|
||||
git-tree-sha1 = "3102bce13da501c9104df33549f511cd25264d7d"
|
||||
uuid = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
|
||||
version = "1.1.4"
|
||||
|
||||
[[ArgTools]]
|
||||
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
|
||||
|
||||
[[Artifacts]]
|
||||
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
|
||||
|
||||
[[BFloat16s]]
|
||||
deps = ["LinearAlgebra", "Test"]
|
||||
git-tree-sha1 = "4af69e205efc343068dc8722b8dfec1ade89254a"
|
||||
uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
|
||||
version = "0.1.0"
|
||||
|
||||
[[Base64]]
|
||||
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
|
||||
|
||||
[[BinaryProvider]]
|
||||
deps = ["Libdl", "Logging", "SHA"]
|
||||
git-tree-sha1 = "ecdec412a9abc8db54c0efc5548c64dfce072058"
|
||||
uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
|
||||
version = "0.5.10"
|
||||
|
||||
[[Bzip2_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "19a35467a82e236ff51bc17a3a44b69ef35185a2"
|
||||
uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0"
|
||||
version = "1.0.8+0"
|
||||
|
||||
[[CEnum]]
|
||||
git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9"
|
||||
uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
|
||||
version = "0.4.1"
|
||||
|
||||
[[CUDA]]
|
||||
deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CompilerSupportLibraries_jll", "DataStructures", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "SpecialFunctions", "TimerOutputs"]
|
||||
git-tree-sha1 = "5e696e37e51b01ae07bd9f700afe6cbd55250bce"
|
||||
uuid = "052768ef-5323-5732-b1bb-66c8b64840ba"
|
||||
version = "3.3.4"
|
||||
|
||||
[[CUDAKernels]]
|
||||
deps = ["Adapt", "CUDA", "Cassette", "KernelAbstractions", "SpecialFunctions", "StaticArrays"]
|
||||
git-tree-sha1 = "81f76297b63c67723b1d60f5e7e002ae3393974b"
|
||||
uuid = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
|
||||
version = "0.3.0"
|
||||
|
||||
[[Cassette]]
|
||||
git-tree-sha1 = "b4b1d61ebbae2bc69a45e3a6b8439b4e411bc131"
|
||||
uuid = "7057c7e9-c182-5462-911a-8362d720325c"
|
||||
version = "0.3.8"
|
||||
|
||||
[[ChainRulesCore]]
|
||||
deps = ["Compat", "LinearAlgebra", "SparseArrays"]
|
||||
git-tree-sha1 = "bdc0937269321858ab2a4f288486cb258b9a0af7"
|
||||
uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
|
||||
version = "1.3.0"
|
||||
|
||||
[[Compat]]
|
||||
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
|
||||
git-tree-sha1 = "727e463cfebd0c7b999bbf3e9e7e16f254b94193"
|
||||
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
|
||||
version = "3.34.0"
|
||||
|
||||
[[CompilerSupportLibraries_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
|
||||
|
||||
[[ConstructionBase]]
|
||||
deps = ["LinearAlgebra"]
|
||||
git-tree-sha1 = "f74e9d5388b8620b4cee35d4c5a618dd4dc547f4"
|
||||
uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
|
||||
version = "1.3.0"
|
||||
|
||||
[[DataStructures]]
|
||||
deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
|
||||
git-tree-sha1 = "7d9d316f04214f7efdbb6398d545446e246eff02"
|
||||
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
|
||||
version = "0.18.10"
|
||||
|
||||
[[Dates]]
|
||||
deps = ["Printf"]
|
||||
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
|
||||
|
||||
[[DelimitedFiles]]
|
||||
deps = ["Mmap"]
|
||||
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
|
||||
|
||||
[[Distributed]]
|
||||
deps = ["Random", "Serialization", "Sockets"]
|
||||
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
|
||||
|
||||
[[DocStringExtensions]]
|
||||
deps = ["LibGit2"]
|
||||
git-tree-sha1 = "a32185f5428d3986f47c2ab78b1f216d5e6cc96f"
|
||||
uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
|
||||
version = "0.8.5"
|
||||
|
||||
[[Downloads]]
|
||||
deps = ["ArgTools", "LibCURL", "NetworkOptions"]
|
||||
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
|
||||
|
||||
[[Elfutils_jll]]
|
||||
deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "XZ_jll", "Zlib_jll", "argp_standalone_jll", "fts_jll", "obstack_jll"]
|
||||
git-tree-sha1 = "8f9fcde6d89b0a3ca51cb2028beab462705c5436"
|
||||
uuid = "ab5a07f8-06af-567f-a878-e8bb879eba5a"
|
||||
version = "0.182.0+0"
|
||||
|
||||
[[ExprTools]]
|
||||
git-tree-sha1 = "b7e3d17636b348f005f11040025ae8c6f645fe92"
|
||||
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
|
||||
version = "0.1.6"
|
||||
|
||||
[[Future]]
|
||||
deps = ["Random"]
|
||||
uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
|
||||
|
||||
[[GPUArrays]]
|
||||
deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization", "Statistics"]
|
||||
git-tree-sha1 = "ececbf05f8904c92814bdbd0aafd5540b0bf2e9a"
|
||||
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
|
||||
version = "7.0.1"
|
||||
|
||||
[[GPUCompiler]]
|
||||
deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"]
|
||||
git-tree-sha1 = "4ed2616d5e656c8716736b64da86755467f26cf5"
|
||||
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
|
||||
version = "0.12.9"
|
||||
|
||||
[[HIP_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "ROCmCompilerSupport_jll", "ROCmDeviceLibs_jll", "ROCmOpenCLRuntime_jll", "hsa_rocr_jll"]
|
||||
git-tree-sha1 = "5097d8f7b6842156ab0928371b3d03fefd8decab"
|
||||
uuid = "2696aab5-0948-5276-aa9a-2a86a37016b8"
|
||||
version = "4.0.0+1"
|
||||
|
||||
[[InteractiveUtils]]
|
||||
deps = ["Markdown"]
|
||||
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
|
||||
|
||||
[[IrrationalConstants]]
|
||||
git-tree-sha1 = "f76424439413893a832026ca355fe273e93bce94"
|
||||
uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
|
||||
version = "0.1.0"
|
||||
|
||||
[[JLLWrappers]]
|
||||
deps = ["Preferences"]
|
||||
git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
|
||||
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
|
||||
version = "1.3.0"
|
||||
|
||||
[[KernelAbstractions]]
|
||||
deps = ["Adapt", "Cassette", "InteractiveUtils", "MacroTools", "SpecialFunctions", "StaticArrays", "UUIDs"]
|
||||
git-tree-sha1 = "5e6c70389c1b1e40adb81664ca8cea6ce8127afc"
|
||||
uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
|
||||
version = "0.7.0"
|
||||
|
||||
[[LLVM]]
|
||||
deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"]
|
||||
git-tree-sha1 = "23a47d417a3cd9c2e73c854bac7dd4731c105ef7"
|
||||
uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
|
||||
version = "4.4.0"
|
||||
|
||||
[[LLVMExtra_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "9c360e5ce980b88bb31a7b086dbb19469008154b"
|
||||
uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab"
|
||||
version = "0.0.10+0"
|
||||
|
||||
[[LazyArtifacts]]
|
||||
deps = ["Artifacts", "Pkg"]
|
||||
uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
|
||||
|
||||
[[LibCURL]]
|
||||
deps = ["LibCURL_jll", "MozillaCACerts_jll"]
|
||||
uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
|
||||
|
||||
[[LibCURL_jll]]
|
||||
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
|
||||
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
|
||||
|
||||
[[LibGit2]]
|
||||
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
|
||||
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
|
||||
|
||||
[[LibSSH2_jll]]
|
||||
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
|
||||
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
|
||||
|
||||
[[Libdl]]
|
||||
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
|
||||
|
||||
[[Libgcrypt_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgpg_error_jll", "Pkg"]
|
||||
git-tree-sha1 = "64613c82a59c120435c067c2b809fc61cf5166ae"
|
||||
uuid = "d4300ac3-e22c-5743-9152-c294e39db1e4"
|
||||
version = "1.8.7+0"
|
||||
|
||||
[[Libglvnd_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll"]
|
||||
git-tree-sha1 = "7739f837d6447403596a75d19ed01fd08d6f56bf"
|
||||
uuid = "7e76a0d4-f3c7-5321-8279-8d96eeed0f29"
|
||||
version = "1.3.0+3"
|
||||
|
||||
[[Libgpg_error_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "c333716e46366857753e273ce6a69ee0945a6db9"
|
||||
uuid = "7add5ba3-2f88-524e-9cd5-f83b8a55f7b8"
|
||||
version = "1.42.0+0"
|
||||
|
||||
[[Libiconv_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "42b62845d70a619f063a7da093d995ec8e15e778"
|
||||
uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
|
||||
version = "1.16.1+1"
|
||||
|
||||
[[LinearAlgebra]]
|
||||
deps = ["Libdl"]
|
||||
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
|
||||
|
||||
[[LogExpFunctions]]
|
||||
deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"]
|
||||
git-tree-sha1 = "3d682c07e6dd250ed082f883dc88aee7996bf2cc"
|
||||
uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
|
||||
version = "0.3.0"
|
||||
|
||||
[[Logging]]
|
||||
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
|
||||
|
||||
[[MacroTools]]
|
||||
deps = ["Markdown", "Random"]
|
||||
git-tree-sha1 = "0fb723cd8c45858c22169b2e42269e53271a6df7"
|
||||
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
|
||||
version = "0.5.7"
|
||||
|
||||
[[Markdown]]
|
||||
deps = ["Base64"]
|
||||
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
|
||||
|
||||
[[MbedTLS_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
|
||||
|
||||
[[Mmap]]
|
||||
uuid = "a63ad114-7e13-5084-954f-fe012c677804"
|
||||
|
||||
[[MozillaCACerts_jll]]
|
||||
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
|
||||
|
||||
[[NUMA_jll]]
|
||||
deps = ["Libdl", "Pkg"]
|
||||
git-tree-sha1 = "778f9bd14400cff2c32ed357e12766ac0e3d766e"
|
||||
uuid = "7f51dc2b-bb24-59f8-b771-bb1490e4195d"
|
||||
version = "2.0.13+1"
|
||||
|
||||
[[NetworkOptions]]
|
||||
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
|
||||
|
||||
[[OpenSpecFun_jll]]
|
||||
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
|
||||
uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
|
||||
version = "0.5.5+0"
|
||||
|
||||
[[OrderedCollections]]
|
||||
git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
|
||||
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
|
||||
version = "1.4.1"
|
||||
|
||||
[[Parameters]]
|
||||
deps = ["OrderedCollections", "UnPack"]
|
||||
git-tree-sha1 = "2276ac65f1e236e0a6ea70baff3f62ad4c625345"
|
||||
uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
|
||||
version = "0.12.2"
|
||||
|
||||
[[Pkg]]
|
||||
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
|
||||
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
|
||||
|
||||
[[Preferences]]
|
||||
deps = ["TOML"]
|
||||
git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a"
|
||||
uuid = "21216c6a-2e73-6563-6e65-726566657250"
|
||||
version = "1.2.2"
|
||||
|
||||
[[Printf]]
|
||||
deps = ["Unicode"]
|
||||
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
|
||||
|
||||
[[REPL]]
|
||||
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
|
||||
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
|
||||
|
||||
[[ROCKernels]]
|
||||
deps = ["AMDGPU", "Adapt", "Cassette", "KernelAbstractions", "SpecialFunctions", "StaticArrays"]
|
||||
git-tree-sha1 = "41105b861342637dde17797bdd9aaa537aca646b"
|
||||
uuid = "7eb9e9f0-4bd3-4c4c-8bef-26bd9629d9b9"
|
||||
version = "0.2.0"
|
||||
|
||||
[[ROCmCompilerSupport_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "ROCmDeviceLibs_jll", "hsa_rocr_jll"]
|
||||
git-tree-sha1 = "56ddcfb5d8b60c9f8c1bc619886f8d363fd1926d"
|
||||
uuid = "8fbdd1d2-db62-5cd0-981e-905da1486e17"
|
||||
version = "4.0.0+1"
|
||||
|
||||
[[ROCmDeviceLibs_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
|
||||
git-tree-sha1 = "d764f0f28b5af89aa004871a6a38e5d061f77257"
|
||||
uuid = "873c0968-716b-5aa7-bb8d-d1e2e2aeff2d"
|
||||
version = "4.0.0+0"
|
||||
|
||||
[[ROCmOpenCLRuntime_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg", "ROCmCompilerSupport_jll", "ROCmDeviceLibs_jll", "Xorg_libX11_jll", "Xorg_xorgproto_jll", "hsa_rocr_jll"]
|
||||
git-tree-sha1 = "f9e3e2cb40a7990535efa7da9b9dd0e0b458a973"
|
||||
uuid = "10ae2a08-2eea-53f8-8c20-eec175020e9f"
|
||||
version = "4.0.0+1"
|
||||
|
||||
[[Random]]
|
||||
deps = ["Serialization"]
|
||||
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
|
||||
|
||||
[[Random123]]
|
||||
deps = ["Libdl", "Random", "RandomNumbers"]
|
||||
git-tree-sha1 = "0e8b146557ad1c6deb1367655e052276690e71a3"
|
||||
uuid = "74087812-796a-5b5d-8853-05524746bad3"
|
||||
version = "1.4.2"
|
||||
|
||||
[[RandomNumbers]]
|
||||
deps = ["Random", "Requires"]
|
||||
git-tree-sha1 = "043da614cc7e95c703498a491e2c21f58a2b8111"
|
||||
uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143"
|
||||
version = "1.5.3"
|
||||
|
||||
[[Reexport]]
|
||||
git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
|
||||
uuid = "189a3867-3050-52da-a836-e630ba90ab69"
|
||||
version = "1.2.2"
|
||||
|
||||
[[Requires]]
|
||||
deps = ["UUIDs"]
|
||||
git-tree-sha1 = "4036a3bd08ac7e968e27c203d45f5fff15020621"
|
||||
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
|
||||
version = "1.1.3"
|
||||
|
||||
[[SHA]]
|
||||
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
|
||||
|
||||
[[Serialization]]
|
||||
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
|
||||
|
||||
[[Setfield]]
|
||||
deps = ["ConstructionBase", "Future", "MacroTools", "Requires"]
|
||||
git-tree-sha1 = "fca29e68c5062722b5b4435594c3d1ba557072a3"
|
||||
uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46"
|
||||
version = "0.7.1"
|
||||
|
||||
[[SharedArrays]]
|
||||
deps = ["Distributed", "Mmap", "Random", "Serialization"]
|
||||
uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
|
||||
|
||||
[[Sockets]]
|
||||
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
|
||||
|
||||
[[SparseArrays]]
|
||||
deps = ["LinearAlgebra", "Random"]
|
||||
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
|
||||
|
||||
[[SpecialFunctions]]
|
||||
deps = ["ChainRulesCore", "LogExpFunctions", "OpenSpecFun_jll"]
|
||||
git-tree-sha1 = "a322a9493e49c5f3a10b50df3aedaf1cdb3244b7"
|
||||
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
|
||||
version = "1.6.1"
|
||||
|
||||
[[StaticArrays]]
|
||||
deps = ["LinearAlgebra", "Random", "Statistics"]
|
||||
git-tree-sha1 = "3240808c6d463ac46f1c1cd7638375cd22abbccb"
|
||||
uuid = "90137ffa-7385-5640-81b9-e52037218182"
|
||||
version = "1.2.12"
|
||||
|
||||
[[Statistics]]
|
||||
deps = ["LinearAlgebra", "SparseArrays"]
|
||||
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
|
||||
|
||||
[[TOML]]
|
||||
deps = ["Dates"]
|
||||
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
|
||||
|
||||
[[Tar]]
|
||||
deps = ["ArgTools", "SHA"]
|
||||
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
|
||||
|
||||
[[Test]]
|
||||
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
|
||||
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||
|
||||
[[TextWrap]]
|
||||
git-tree-sha1 = "9250ef9b01b66667380cf3275b3f7488d0e25faf"
|
||||
uuid = "b718987f-49a8-5099-9789-dcd902bef87d"
|
||||
version = "1.0.1"
|
||||
|
||||
[[TimerOutputs]]
|
||||
deps = ["ExprTools", "Printf"]
|
||||
git-tree-sha1 = "209a8326c4f955e2442c07b56029e88bb48299c7"
|
||||
uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
|
||||
version = "0.5.12"
|
||||
|
||||
[[UUIDs]]
|
||||
deps = ["Random", "SHA"]
|
||||
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
|
||||
|
||||
[[UnPack]]
|
||||
git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
|
||||
uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
|
||||
version = "1.0.2"
|
||||
|
||||
[[Unicode]]
|
||||
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
|
||||
|
||||
[[XML2_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"]
|
||||
git-tree-sha1 = "1acf5bdf07aa0907e0a37d3718bb88d4b687b74a"
|
||||
uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a"
|
||||
version = "2.9.12+0"
|
||||
|
||||
[[XSLT_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Libgpg_error_jll", "Libiconv_jll", "Pkg", "XML2_jll", "Zlib_jll"]
|
||||
git-tree-sha1 = "91844873c4085240b95e795f692c4cec4d805f8a"
|
||||
uuid = "aed1982a-8fda-507f-9586-7b0439959a61"
|
||||
version = "1.1.34+0"
|
||||
|
||||
[[XZ_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "a921669cd9a45c23031fd4eb904f5cc3d20de415"
|
||||
uuid = "ffd25f8a-64ca-5728-b0f7-c24cf3aae800"
|
||||
version = "5.2.5+2"
|
||||
|
||||
[[Xorg_libX11_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll", "Xorg_xtrans_jll"]
|
||||
git-tree-sha1 = "5be649d550f3f4b95308bf0183b82e2582876527"
|
||||
uuid = "4f6342f7-b3d2-589e-9d20-edeb45f2b2bc"
|
||||
version = "1.6.9+4"
|
||||
|
||||
[[Xorg_libXau_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "4e490d5c960c314f33885790ed410ff3a94ce67e"
|
||||
uuid = "0c0b7dd1-d40b-584c-a123-a41640f87eec"
|
||||
version = "1.0.9+4"
|
||||
|
||||
[[Xorg_libXdmcp_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "4fe47bd2247248125c428978740e18a681372dd4"
|
||||
uuid = "a3789734-cfe1-5b06-b2d0-1dd0d9d62d05"
|
||||
version = "1.1.3+4"
|
||||
|
||||
[[Xorg_libXext_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"]
|
||||
git-tree-sha1 = "b7c0aa8c376b31e4852b360222848637f481f8c3"
|
||||
uuid = "1082639a-0dae-5f34-9b06-72781eeb8cb3"
|
||||
version = "1.3.4+4"
|
||||
|
||||
[[Xorg_libpthread_stubs_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "6783737e45d3c59a4a4c4091f5f88cdcf0908cbb"
|
||||
uuid = "14d82f49-176c-5ed1-bb49-ad3f5cbd8c74"
|
||||
version = "0.1.0+3"
|
||||
|
||||
[[Xorg_libxcb_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "XSLT_jll", "Xorg_libXau_jll", "Xorg_libXdmcp_jll", "Xorg_libpthread_stubs_jll"]
|
||||
git-tree-sha1 = "daf17f441228e7a3833846cd048892861cff16d6"
|
||||
uuid = "c7cfdc94-dc32-55de-ac96-5a1b8d977c5b"
|
||||
version = "1.13.0+3"
|
||||
|
||||
[[Xorg_xorgproto_jll]]
|
||||
deps = ["Libdl", "Pkg"]
|
||||
git-tree-sha1 = "9a9eb8ce756fe0bca01b4be16da770e18d264972"
|
||||
uuid = "c4d99508-4286-5418-9131-c86396af500b"
|
||||
version = "2019.2.0+2"
|
||||
|
||||
[[Xorg_xtrans_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "79c31e7844f6ecf779705fbc12146eb190b7d845"
|
||||
uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10"
|
||||
version = "1.4.0+3"
|
||||
|
||||
[[Zlib_jll]]
|
||||
deps = ["Libdl"]
|
||||
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
|
||||
|
||||
[[argp_standalone_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "feaf9f6293003c2bf53056fd6930d677ed340b34"
|
||||
uuid = "c53206cc-00f7-50bf-ad1e-3ae1f6e49bc3"
|
||||
version = "1.3.1+0"
|
||||
|
||||
[[fts_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "78732b942383d2cb521df8a1a0814911144e663d"
|
||||
uuid = "d65627f6-89bd-53e8-8ab5-8b75ff535eee"
|
||||
version = "1.2.7+1"
|
||||
|
||||
[[hsa_rocr_jll]]
|
||||
deps = ["Artifacts", "Elfutils_jll", "JLLWrappers", "Libdl", "NUMA_jll", "Pkg", "Zlib_jll", "hsakmt_roct_jll"]
|
||||
git-tree-sha1 = "df8d73efec8b1e53ad527d208f5343c0368f0fcd"
|
||||
uuid = "dd59ff1a-a01a-568d-8b29-0669330f116a"
|
||||
version = "4.0.0+0"
|
||||
|
||||
[[hsakmt_roct_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "NUMA_jll", "Pkg"]
|
||||
git-tree-sha1 = "80e0c9940e15cfd6f1f1e9d9f3953ec4d48d3d4a"
|
||||
uuid = "1cecccd7-a9b6-5045-9cdc-a44c19b16d76"
|
||||
version = "4.0.0+0"
|
||||
|
||||
[[nghttp2_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
|
||||
|
||||
[[obstack_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "1c4a6b66e934fc6db4649cb2910c72f53bbfea7e"
|
||||
uuid = "c88a4935-d25e-5644-aacc-5db6f1b8ef79"
|
||||
version = "1.2.2+0"
|
||||
|
||||
[[p7zip_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
|
||||
11
JuliaStream.jl/KernelAbstractions/Project.toml
Normal file
11
JuliaStream.jl/KernelAbstractions/Project.toml
Normal file
@ -0,0 +1,11 @@
|
||||
[deps]
|
||||
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
|
||||
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
|
||||
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
|
||||
CUDAKernels = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
|
||||
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
|
||||
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
|
||||
ROCKernels = "7eb9e9f0-4bd3-4c4c-8bef-26bd9629d9b9"
|
||||
|
||||
[compat]
|
||||
julia = "1.6"
|
||||
493
JuliaStream.jl/Manifest.toml
Normal file
493
JuliaStream.jl/Manifest.toml
Normal file
@ -0,0 +1,493 @@
|
||||
# This file is machine-generated - editing it directly is not advised
|
||||
|
||||
[[AMDGPU]]
|
||||
deps = ["AbstractFFTs", "Adapt", "BinaryProvider", "CEnum", "GPUArrays", "GPUCompiler", "LLVM", "Libdl", "LinearAlgebra", "MacroTools", "Printf", "Random", "Requires", "Setfield", "hsa_rocr_jll", "hsakmt_roct_jll"]
|
||||
git-tree-sha1 = "04fdb3923ac6f55fa7347dce0f0f6f10e321e2e9"
|
||||
uuid = "21141c5a-9bdb-4563-92ae-f87d6854732e"
|
||||
version = "0.2.7"
|
||||
|
||||
[[AbstractFFTs]]
|
||||
deps = ["LinearAlgebra"]
|
||||
git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0"
|
||||
uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
|
||||
version = "1.0.1"
|
||||
|
||||
[[Adapt]]
|
||||
deps = ["LinearAlgebra"]
|
||||
git-tree-sha1 = "84918055d15b3114ede17ac6a7182f68870c16f7"
|
||||
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
|
||||
version = "3.3.1"
|
||||
|
||||
[[ArgParse]]
|
||||
deps = ["Logging", "TextWrap"]
|
||||
git-tree-sha1 = "3102bce13da501c9104df33549f511cd25264d7d"
|
||||
uuid = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
|
||||
version = "1.1.4"
|
||||
|
||||
[[ArgTools]]
|
||||
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
|
||||
|
||||
[[Artifacts]]
|
||||
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
|
||||
|
||||
[[BFloat16s]]
|
||||
deps = ["LinearAlgebra", "Test"]
|
||||
git-tree-sha1 = "4af69e205efc343068dc8722b8dfec1ade89254a"
|
||||
uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
|
||||
version = "0.1.0"
|
||||
|
||||
[[Base64]]
|
||||
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
|
||||
|
||||
[[BinaryProvider]]
|
||||
deps = ["Libdl", "Logging", "SHA"]
|
||||
git-tree-sha1 = "ecdec412a9abc8db54c0efc5548c64dfce072058"
|
||||
uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
|
||||
version = "0.5.10"
|
||||
|
||||
[[Bzip2_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "19a35467a82e236ff51bc17a3a44b69ef35185a2"
|
||||
uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0"
|
||||
version = "1.0.8+0"
|
||||
|
||||
[[CEnum]]
|
||||
git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9"
|
||||
uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
|
||||
version = "0.4.1"
|
||||
|
||||
[[CUDA]]
|
||||
deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CompilerSupportLibraries_jll", "DataStructures", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "MacroTools", "Memoize", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "SpecialFunctions", "TimerOutputs"]
|
||||
git-tree-sha1 = "364179416eabc34c9ca32126a6bdb431680c3bad"
|
||||
uuid = "052768ef-5323-5732-b1bb-66c8b64840ba"
|
||||
version = "3.2.1"
|
||||
|
||||
[[CUDAKernels]]
|
||||
deps = ["Adapt", "CUDA", "Cassette", "KernelAbstractions", "SpecialFunctions", "StaticArrays"]
|
||||
git-tree-sha1 = "81f76297b63c67723b1d60f5e7e002ae3393974b"
|
||||
uuid = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
|
||||
version = "0.3.0"
|
||||
|
||||
[[Cassette]]
|
||||
git-tree-sha1 = "b4b1d61ebbae2bc69a45e3a6b8439b4e411bc131"
|
||||
uuid = "7057c7e9-c182-5462-911a-8362d720325c"
|
||||
version = "0.3.8"
|
||||
|
||||
[[ChainRulesCore]]
|
||||
deps = ["Compat", "LinearAlgebra", "SparseArrays"]
|
||||
git-tree-sha1 = "bdc0937269321858ab2a4f288486cb258b9a0af7"
|
||||
uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
|
||||
version = "1.3.0"
|
||||
|
||||
[[Compat]]
|
||||
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
|
||||
git-tree-sha1 = "727e463cfebd0c7b999bbf3e9e7e16f254b94193"
|
||||
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
|
||||
version = "3.34.0"
|
||||
|
||||
[[CompilerSupportLibraries_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
|
||||
|
||||
[[ConstructionBase]]
|
||||
deps = ["LinearAlgebra"]
|
||||
git-tree-sha1 = "f74e9d5388b8620b4cee35d4c5a618dd4dc547f4"
|
||||
uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9"
|
||||
version = "1.3.0"
|
||||
|
||||
[[DataStructures]]
|
||||
deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
|
||||
git-tree-sha1 = "7d9d316f04214f7efdbb6398d545446e246eff02"
|
||||
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
|
||||
version = "0.18.10"
|
||||
|
||||
[[Dates]]
|
||||
deps = ["Printf"]
|
||||
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
|
||||
|
||||
[[DelimitedFiles]]
|
||||
deps = ["Mmap"]
|
||||
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
|
||||
|
||||
[[Distributed]]
|
||||
deps = ["Random", "Serialization", "Sockets"]
|
||||
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
|
||||
|
||||
[[DocStringExtensions]]
|
||||
deps = ["LibGit2"]
|
||||
git-tree-sha1 = "a32185f5428d3986f47c2ab78b1f216d5e6cc96f"
|
||||
uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
|
||||
version = "0.8.5"
|
||||
|
||||
[[Downloads]]
|
||||
deps = ["ArgTools", "LibCURL", "NetworkOptions"]
|
||||
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
|
||||
|
||||
[[Elfutils_jll]]
|
||||
deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "XZ_jll", "Zlib_jll", "argp_standalone_jll", "fts_jll", "obstack_jll"]
|
||||
git-tree-sha1 = "8f9fcde6d89b0a3ca51cb2028beab462705c5436"
|
||||
uuid = "ab5a07f8-06af-567f-a878-e8bb879eba5a"
|
||||
version = "0.182.0+0"
|
||||
|
||||
[[ExprTools]]
|
||||
git-tree-sha1 = "b7e3d17636b348f005f11040025ae8c6f645fe92"
|
||||
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
|
||||
version = "0.1.6"
|
||||
|
||||
[[Future]]
|
||||
deps = ["Random"]
|
||||
uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
|
||||
|
||||
[[GPUArrays]]
|
||||
deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization", "Statistics"]
|
||||
git-tree-sha1 = "df5b8569904c5c10e84c640984cfff054b18c086"
|
||||
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
|
||||
version = "6.4.1"
|
||||
|
||||
[[GPUCompiler]]
|
||||
deps = ["DataStructures", "ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "Scratch", "Serialization", "TimerOutputs", "UUIDs"]
|
||||
git-tree-sha1 = "42d635f6d87af125b86288df3819f805fb4d851a"
|
||||
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
|
||||
version = "0.11.5"
|
||||
|
||||
[[InteractiveUtils]]
|
||||
deps = ["Markdown"]
|
||||
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
|
||||
|
||||
[[IrrationalConstants]]
|
||||
git-tree-sha1 = "f76424439413893a832026ca355fe273e93bce94"
|
||||
uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
|
||||
version = "0.1.0"
|
||||
|
||||
[[JLLWrappers]]
|
||||
deps = ["Preferences"]
|
||||
git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
|
||||
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
|
||||
version = "1.3.0"
|
||||
|
||||
[[KernelAbstractions]]
|
||||
deps = ["Adapt", "Cassette", "InteractiveUtils", "MacroTools", "SpecialFunctions", "StaticArrays", "UUIDs"]
|
||||
git-tree-sha1 = "5e6c70389c1b1e40adb81664ca8cea6ce8127afc"
|
||||
uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
|
||||
version = "0.7.0"
|
||||
|
||||
[[LLVM]]
|
||||
deps = ["CEnum", "Libdl", "Printf", "Unicode"]
|
||||
git-tree-sha1 = "f57ac3fd2045b50d3db081663837ac5b4096947e"
|
||||
uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
|
||||
version = "3.9.0"
|
||||
|
||||
[[LazyArtifacts]]
|
||||
deps = ["Artifacts", "Pkg"]
|
||||
uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
|
||||
|
||||
[[LibCURL]]
|
||||
deps = ["LibCURL_jll", "MozillaCACerts_jll"]
|
||||
uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
|
||||
|
||||
[[LibCURL_jll]]
|
||||
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
|
||||
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
|
||||
|
||||
[[LibGit2]]
|
||||
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
|
||||
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
|
||||
|
||||
[[LibSSH2_jll]]
|
||||
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
|
||||
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
|
||||
|
||||
[[Libdl]]
|
||||
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
|
||||
|
||||
[[LinearAlgebra]]
|
||||
deps = ["Libdl"]
|
||||
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
|
||||
|
||||
[[LogExpFunctions]]
|
||||
deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"]
|
||||
git-tree-sha1 = "3d682c07e6dd250ed082f883dc88aee7996bf2cc"
|
||||
uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
|
||||
version = "0.3.0"
|
||||
|
||||
[[Logging]]
|
||||
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
|
||||
|
||||
[[MacroTools]]
|
||||
deps = ["Markdown", "Random"]
|
||||
git-tree-sha1 = "0fb723cd8c45858c22169b2e42269e53271a6df7"
|
||||
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
|
||||
version = "0.5.7"
|
||||
|
||||
[[Markdown]]
|
||||
deps = ["Base64"]
|
||||
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
|
||||
|
||||
[[MbedTLS_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
|
||||
|
||||
[[Memoize]]
|
||||
deps = ["MacroTools"]
|
||||
git-tree-sha1 = "2b1dfcba103de714d31c033b5dacc2e4a12c7caa"
|
||||
uuid = "c03570c3-d221-55d1-a50c-7939bbd78826"
|
||||
version = "0.4.4"
|
||||
|
||||
[[Mmap]]
|
||||
uuid = "a63ad114-7e13-5084-954f-fe012c677804"
|
||||
|
||||
[[MozillaCACerts_jll]]
|
||||
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
|
||||
|
||||
[[NEO_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "gmmlib_jll", "libigc_jll", "oneAPI_Level_Zero_Headers_jll"]
|
||||
git-tree-sha1 = "c753dd029eb0837658bf8eaee041c19e4ce5bb8c"
|
||||
uuid = "700fe977-ac61-5f37-bbc8-c6c4b2b6a9fd"
|
||||
version = "21.12.19358+0"
|
||||
|
||||
[[NUMA_jll]]
|
||||
deps = ["Libdl", "Pkg"]
|
||||
git-tree-sha1 = "778f9bd14400cff2c32ed357e12766ac0e3d766e"
|
||||
uuid = "7f51dc2b-bb24-59f8-b771-bb1490e4195d"
|
||||
version = "2.0.13+1"
|
||||
|
||||
[[NetworkOptions]]
|
||||
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
|
||||
|
||||
[[OpenSpecFun_jll]]
|
||||
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
|
||||
uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
|
||||
version = "0.5.5+0"
|
||||
|
||||
[[OrderedCollections]]
|
||||
git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
|
||||
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
|
||||
version = "1.4.1"
|
||||
|
||||
[[Parameters]]
|
||||
deps = ["OrderedCollections", "UnPack"]
|
||||
git-tree-sha1 = "2276ac65f1e236e0a6ea70baff3f62ad4c625345"
|
||||
uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
|
||||
version = "0.12.2"
|
||||
|
||||
[[Pkg]]
|
||||
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
|
||||
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
|
||||
|
||||
[[Preferences]]
|
||||
deps = ["TOML"]
|
||||
git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a"
|
||||
uuid = "21216c6a-2e73-6563-6e65-726566657250"
|
||||
version = "1.2.2"
|
||||
|
||||
[[Printf]]
|
||||
deps = ["Unicode"]
|
||||
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
|
||||
|
||||
[[REPL]]
|
||||
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
|
||||
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
|
||||
|
||||
[[ROCKernels]]
|
||||
deps = ["AMDGPU", "Adapt", "Cassette", "KernelAbstractions", "SpecialFunctions", "StaticArrays"]
|
||||
git-tree-sha1 = "41105b861342637dde17797bdd9aaa537aca646b"
|
||||
uuid = "7eb9e9f0-4bd3-4c4c-8bef-26bd9629d9b9"
|
||||
version = "0.2.0"
|
||||
|
||||
[[Random]]
|
||||
deps = ["Serialization"]
|
||||
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
|
||||
|
||||
[[Random123]]
|
||||
deps = ["Libdl", "Random", "RandomNumbers"]
|
||||
git-tree-sha1 = "0e8b146557ad1c6deb1367655e052276690e71a3"
|
||||
uuid = "74087812-796a-5b5d-8853-05524746bad3"
|
||||
version = "1.4.2"
|
||||
|
||||
[[RandomNumbers]]
|
||||
deps = ["Random", "Requires"]
|
||||
git-tree-sha1 = "043da614cc7e95c703498a491e2c21f58a2b8111"
|
||||
uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143"
|
||||
version = "1.5.3"
|
||||
|
||||
[[Reexport]]
|
||||
git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
|
||||
uuid = "189a3867-3050-52da-a836-e630ba90ab69"
|
||||
version = "1.2.2"
|
||||
|
||||
[[Requires]]
|
||||
deps = ["UUIDs"]
|
||||
git-tree-sha1 = "4036a3bd08ac7e968e27c203d45f5fff15020621"
|
||||
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
|
||||
version = "1.1.3"
|
||||
|
||||
[[SHA]]
|
||||
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
|
||||
|
||||
[[SPIRV_LLVM_Translator_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "8cca87d57f6ddf19373cc9791fddc741406c8fbf"
|
||||
uuid = "4a5d46fc-d8cf-5151-a261-86b458210efb"
|
||||
version = "11.0.0+2"
|
||||
|
||||
[[SPIRV_Tools_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "c0324b7e07bc4649f755bfe7e00f7c6ed6aa353f"
|
||||
uuid = "6ac6d60f-d740-5983-97d7-a4482c0689f4"
|
||||
version = "2021.2.0+0"
|
||||
|
||||
[[Scratch]]
|
||||
deps = ["Dates"]
|
||||
git-tree-sha1 = "0b4b7f1393cff97c33891da2a0bf69c6ed241fda"
|
||||
uuid = "6c6a2e73-6563-6170-7368-637461726353"
|
||||
version = "1.1.0"
|
||||
|
||||
[[Serialization]]
|
||||
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
|
||||
|
||||
[[Setfield]]
|
||||
deps = ["ConstructionBase", "Future", "MacroTools", "Requires"]
|
||||
git-tree-sha1 = "fca29e68c5062722b5b4435594c3d1ba557072a3"
|
||||
uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46"
|
||||
version = "0.7.1"
|
||||
|
||||
[[SharedArrays]]
|
||||
deps = ["Distributed", "Mmap", "Random", "Serialization"]
|
||||
uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
|
||||
|
||||
[[Sockets]]
|
||||
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
|
||||
|
||||
[[SparseArrays]]
|
||||
deps = ["LinearAlgebra", "Random"]
|
||||
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
|
||||
|
||||
[[SpecialFunctions]]
|
||||
deps = ["ChainRulesCore", "LogExpFunctions", "OpenSpecFun_jll"]
|
||||
git-tree-sha1 = "a322a9493e49c5f3a10b50df3aedaf1cdb3244b7"
|
||||
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
|
||||
version = "1.6.1"
|
||||
|
||||
[[StaticArrays]]
|
||||
deps = ["LinearAlgebra", "Random", "Statistics"]
|
||||
git-tree-sha1 = "3240808c6d463ac46f1c1cd7638375cd22abbccb"
|
||||
uuid = "90137ffa-7385-5640-81b9-e52037218182"
|
||||
version = "1.2.12"
|
||||
|
||||
[[Statistics]]
|
||||
deps = ["LinearAlgebra", "SparseArrays"]
|
||||
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
|
||||
|
||||
[[TOML]]
|
||||
deps = ["Dates"]
|
||||
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
|
||||
|
||||
[[Tar]]
|
||||
deps = ["ArgTools", "SHA"]
|
||||
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
|
||||
|
||||
[[Test]]
|
||||
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
|
||||
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||
|
||||
[[TextWrap]]
|
||||
git-tree-sha1 = "9250ef9b01b66667380cf3275b3f7488d0e25faf"
|
||||
uuid = "b718987f-49a8-5099-9789-dcd902bef87d"
|
||||
version = "1.0.1"
|
||||
|
||||
[[TimerOutputs]]
|
||||
deps = ["ExprTools", "Printf"]
|
||||
git-tree-sha1 = "209a8326c4f955e2442c07b56029e88bb48299c7"
|
||||
uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
|
||||
version = "0.5.12"
|
||||
|
||||
[[UUIDs]]
|
||||
deps = ["Random", "SHA"]
|
||||
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
|
||||
|
||||
[[UnPack]]
|
||||
git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
|
||||
uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
|
||||
version = "1.0.2"
|
||||
|
||||
[[Unicode]]
|
||||
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
|
||||
|
||||
[[XZ_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "a921669cd9a45c23031fd4eb904f5cc3d20de415"
|
||||
uuid = "ffd25f8a-64ca-5728-b0f7-c24cf3aae800"
|
||||
version = "5.2.5+2"
|
||||
|
||||
[[Zlib_jll]]
|
||||
deps = ["Libdl"]
|
||||
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
|
||||
|
||||
[[argp_standalone_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "feaf9f6293003c2bf53056fd6930d677ed340b34"
|
||||
uuid = "c53206cc-00f7-50bf-ad1e-3ae1f6e49bc3"
|
||||
version = "1.3.1+0"
|
||||
|
||||
[[fts_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "78732b942383d2cb521df8a1a0814911144e663d"
|
||||
uuid = "d65627f6-89bd-53e8-8ab5-8b75ff535eee"
|
||||
version = "1.2.7+1"
|
||||
|
||||
[[gmmlib_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "4067ef455d4fa67febe26efc3f9565a9bb7ba911"
|
||||
uuid = "09858cae-167c-5acb-9302-fddc6874d481"
|
||||
version = "20.3.2+0"
|
||||
|
||||
[[hsa_rocr_jll]]
|
||||
deps = ["Artifacts", "Elfutils_jll", "JLLWrappers", "Libdl", "NUMA_jll", "Pkg", "Zlib_jll", "hsakmt_roct_jll"]
|
||||
git-tree-sha1 = "42189f176d6ae4f37c0c0e652fec339bb0bfab5d"
|
||||
uuid = "dd59ff1a-a01a-568d-8b29-0669330f116a"
|
||||
version = "3.7.0+1"
|
||||
|
||||
[[hsakmt_roct_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "NUMA_jll", "Pkg"]
|
||||
git-tree-sha1 = "8a9ee6c091e952e4ea6585d15131d43f789ae041"
|
||||
uuid = "1cecccd7-a9b6-5045-9cdc-a44c19b16d76"
|
||||
version = "3.8.0+0"
|
||||
|
||||
[[libigc_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "6140dbf267f7ab57fb791b49f2114374218b5c20"
|
||||
uuid = "94295238-5935-5bd7-bb0f-b00942e9bdd5"
|
||||
version = "1.0.6712+0"
|
||||
|
||||
[[nghttp2_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
|
||||
|
||||
[[obstack_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "1c4a6b66e934fc6db4649cb2910c72f53bbfea7e"
|
||||
uuid = "c88a4935-d25e-5644-aacc-5db6f1b8ef79"
|
||||
version = "1.2.2+0"
|
||||
|
||||
[[oneAPI]]
|
||||
deps = ["Adapt", "CEnum", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "LinearAlgebra", "NEO_jll", "Printf", "Random", "SPIRV_LLVM_Translator_jll", "SPIRV_Tools_jll", "SpecialFunctions", "oneAPI_Level_Zero_Loader_jll"]
|
||||
git-tree-sha1 = "b4a4b84c864e75fe885a1643525f0c97ce310dd9"
|
||||
uuid = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"
|
||||
version = "0.1.3"
|
||||
|
||||
[[oneAPI_Level_Zero_Headers_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "48982fbfd2f3d0a30d644563dcf96892d252b395"
|
||||
uuid = "f4bc562b-d309-54f8-9efb-476e56f0410d"
|
||||
version = "1.1.2+1"
|
||||
|
||||
[[oneAPI_Level_Zero_Loader_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "oneAPI_Level_Zero_Headers_jll"]
|
||||
git-tree-sha1 = "1fa53dfdd32a732f09c254c86403e1abab653fb2"
|
||||
uuid = "13eca655-d68d-5b81-8367-6d99d727ab01"
|
||||
version = "1.3.6+0"
|
||||
|
||||
[[p7zip_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
|
||||
19
JuliaStream.jl/Project.toml
Normal file
19
JuliaStream.jl/Project.toml
Normal file
@ -0,0 +1,19 @@
|
||||
name = "JuliaStream"
|
||||
uuid = "1bdcc9b7-f5ed-4705-bc7b-be1b748ec681"
|
||||
authors = ["Wei-Chen Lin <wl14928@bristol.ac.uk>"]
|
||||
version = "3.4.0"
|
||||
|
||||
[deps]
|
||||
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
|
||||
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
|
||||
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
|
||||
CUDAKernels = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
|
||||
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
|
||||
ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
|
||||
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
|
||||
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
|
||||
ROCKernels = "7eb9e9f0-4bd3-4c4c-8bef-26bd9629d9b9"
|
||||
oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"
|
||||
|
||||
[compat]
|
||||
julia = "1.6"
|
||||
67
JuliaStream.jl/README.md
Normal file
67
JuliaStream.jl/README.md
Normal file
@ -0,0 +1,67 @@
|
||||
JuliaStream.jl
|
||||
==============
|
||||
|
||||
This is an implementation of BabelStream in Julia which contains the following variants:
|
||||
|
||||
* `PlainStream.jl` - Single threaded `for`
|
||||
* `ThreadedStream.jl` - Threaded implementation with `Threads.@threads` macros
|
||||
* `DistributedStream.jl` - Process based parallelism with `@distributed` macros
|
||||
* `CUDAStream.jl` - Direct port of BabelStream's native CUDA implementation using [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl)
|
||||
* `AMDGPUStream.jl` - Direct port of BabelStream's native HIP implementation using [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl)
|
||||
* `oneAPIStream.jl` - Direct port of BabelStream's native SYCL implementation using [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl)
|
||||
* `KernelAbstractions.jl` - Direct port of miniBUDE's native CUDA implementation using [KernelAbstractions.jl](https://github.com/JuliaGPU/KernelAbstractions.jl)
|
||||
|
||||
### Build & Run
|
||||
|
||||
Prerequisites
|
||||
|
||||
* Julia >= 1.6+
|
||||
|
||||
A set of reduced dependency projects are available for the following backend and implementations:
|
||||
|
||||
* `AMDGPU` supports:
|
||||
- `AMDGPUStream.jl`
|
||||
* `CUDA` supports:
|
||||
- `CUDAStream.jl`
|
||||
* `oneAPI` supports:
|
||||
- `oneAPIStream.jl`
|
||||
* `KernelAbstractions` supports:
|
||||
- `KernelAbstractionsStream.jl`
|
||||
* `Threaded` supports:
|
||||
- `PlainStream.jl`
|
||||
- `ThreadedStream.jl`
|
||||
- `DistributedStream.jl`
|
||||
|
||||
With Julia on path, run your selected benchmark with:
|
||||
|
||||
```shell
|
||||
> cd JuliaStream.jl
|
||||
> julia --project=<BACKEND> -e 'import Pkg; Pkg.instantiate()' # only required on first run
|
||||
> julia --project=<BACKEND> src/<IMPL>Stream.jl
|
||||
```
|
||||
|
||||
For example. to run the CUDA implementation:
|
||||
|
||||
```shell
|
||||
> cd JuliaStream.jl
|
||||
> julia --project=CUDA -e 'import Pkg; Pkg.instantiate()'
|
||||
> julia --project=CUDA src/CUDAStream.jl
|
||||
```
|
||||
|
||||
**Important:**
|
||||
* Julia is 1-indexed, so N >= 1 in `--device N`.
|
||||
* Thread count for `ThreadedStream` must be set via the `JULIA_NUM_THREADS` environment variable (e.g `export JULIA_NUM_THREADS=$(nproc)`) otherwise it defaults to 1.
|
||||
* Worker count for `DistributedStream` is set with `-p <N>` as per the [documentation](https://docs.julialang.org/en/v1/manual/distributed-computing).
|
||||
* Certain implementations such as CUDA and AMDGPU will do hardware detection at runtime and may download and/or compile further software packages for the platform.
|
||||
|
||||
***
|
||||
|
||||
Alternatively, the top-level project `Project.toml` contains all dependencies needed to run all implementations in `src`.
|
||||
There may be instances where some packages are locked to an older version because of transitive dependency requirements.
|
||||
|
||||
To run the benchmark using the top-level project, run the benchmark with:
|
||||
```shell
|
||||
> cd JuliaStream.jl
|
||||
> julia --project -e 'import Pkg; Pkg.instantiate()'
|
||||
> julia --project src/<IMPL>Stream.jl
|
||||
```
|
||||
31
JuliaStream.jl/Threaded/Manifest.toml
Normal file
31
JuliaStream.jl/Threaded/Manifest.toml
Normal file
@ -0,0 +1,31 @@
|
||||
# This file is machine-generated - editing it directly is not advised
|
||||
|
||||
[[ArgParse]]
|
||||
deps = ["Logging", "TextWrap"]
|
||||
git-tree-sha1 = "3102bce13da501c9104df33549f511cd25264d7d"
|
||||
uuid = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
|
||||
version = "1.1.4"
|
||||
|
||||
[[Logging]]
|
||||
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
|
||||
|
||||
[[OrderedCollections]]
|
||||
git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
|
||||
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
|
||||
version = "1.4.1"
|
||||
|
||||
[[Parameters]]
|
||||
deps = ["OrderedCollections", "UnPack"]
|
||||
git-tree-sha1 = "2276ac65f1e236e0a6ea70baff3f62ad4c625345"
|
||||
uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
|
||||
version = "0.12.2"
|
||||
|
||||
[[TextWrap]]
|
||||
git-tree-sha1 = "9250ef9b01b66667380cf3275b3f7488d0e25faf"
|
||||
uuid = "b718987f-49a8-5099-9789-dcd902bef87d"
|
||||
version = "1.0.1"
|
||||
|
||||
[[UnPack]]
|
||||
git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
|
||||
uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
|
||||
version = "1.0.2"
|
||||
6
JuliaStream.jl/Threaded/Project.toml
Normal file
6
JuliaStream.jl/Threaded/Project.toml
Normal file
@ -0,0 +1,6 @@
|
||||
[deps]
|
||||
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
|
||||
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
|
||||
|
||||
[compat]
|
||||
julia = "1.6"
|
||||
319
JuliaStream.jl/oneAPI/Manifest.toml
Normal file
319
JuliaStream.jl/oneAPI/Manifest.toml
Normal file
@ -0,0 +1,319 @@
|
||||
# This file is machine-generated - editing it directly is not advised
|
||||
|
||||
[[Adapt]]
|
||||
deps = ["LinearAlgebra"]
|
||||
git-tree-sha1 = "84918055d15b3114ede17ac6a7182f68870c16f7"
|
||||
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
|
||||
version = "3.3.1"
|
||||
|
||||
[[ArgParse]]
|
||||
deps = ["Logging", "TextWrap"]
|
||||
git-tree-sha1 = "3102bce13da501c9104df33549f511cd25264d7d"
|
||||
uuid = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
|
||||
version = "1.1.4"
|
||||
|
||||
[[ArgTools]]
|
||||
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
|
||||
|
||||
[[Artifacts]]
|
||||
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
|
||||
|
||||
[[Base64]]
|
||||
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
|
||||
|
||||
[[CEnum]]
|
||||
git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9"
|
||||
uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
|
||||
version = "0.4.1"
|
||||
|
||||
[[ChainRulesCore]]
|
||||
deps = ["Compat", "LinearAlgebra", "SparseArrays"]
|
||||
git-tree-sha1 = "bdc0937269321858ab2a4f288486cb258b9a0af7"
|
||||
uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
|
||||
version = "1.3.0"
|
||||
|
||||
[[Compat]]
|
||||
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
|
||||
git-tree-sha1 = "727e463cfebd0c7b999bbf3e9e7e16f254b94193"
|
||||
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
|
||||
version = "3.34.0"
|
||||
|
||||
[[CompilerSupportLibraries_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
|
||||
|
||||
[[Dates]]
|
||||
deps = ["Printf"]
|
||||
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
|
||||
|
||||
[[DelimitedFiles]]
|
||||
deps = ["Mmap"]
|
||||
uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
|
||||
|
||||
[[Distributed]]
|
||||
deps = ["Random", "Serialization", "Sockets"]
|
||||
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
|
||||
|
||||
[[DocStringExtensions]]
|
||||
deps = ["LibGit2"]
|
||||
git-tree-sha1 = "a32185f5428d3986f47c2ab78b1f216d5e6cc96f"
|
||||
uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
|
||||
version = "0.8.5"
|
||||
|
||||
[[Downloads]]
|
||||
deps = ["ArgTools", "LibCURL", "NetworkOptions"]
|
||||
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
|
||||
|
||||
[[ExprTools]]
|
||||
git-tree-sha1 = "b7e3d17636b348f005f11040025ae8c6f645fe92"
|
||||
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
|
||||
version = "0.1.6"
|
||||
|
||||
[[GPUArrays]]
|
||||
deps = ["Adapt", "LinearAlgebra", "Printf", "Random", "Serialization", "Statistics"]
|
||||
git-tree-sha1 = "8fac1cf7d6ce0f2249c7acaf25d22e1e85c4a07f"
|
||||
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
|
||||
version = "8.0.2"
|
||||
|
||||
[[GPUCompiler]]
|
||||
deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"]
|
||||
git-tree-sha1 = "4ed2616d5e656c8716736b64da86755467f26cf5"
|
||||
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
|
||||
version = "0.12.9"
|
||||
|
||||
[[InteractiveUtils]]
|
||||
deps = ["Markdown"]
|
||||
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
|
||||
|
||||
[[IrrationalConstants]]
|
||||
git-tree-sha1 = "f76424439413893a832026ca355fe273e93bce94"
|
||||
uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
|
||||
version = "0.1.0"
|
||||
|
||||
[[JLLWrappers]]
|
||||
deps = ["Preferences"]
|
||||
git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e"
|
||||
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
|
||||
version = "1.3.0"
|
||||
|
||||
[[LLVM]]
|
||||
deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"]
|
||||
git-tree-sha1 = "23a47d417a3cd9c2e73c854bac7dd4731c105ef7"
|
||||
uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
|
||||
version = "4.4.0"
|
||||
|
||||
[[LLVMExtra_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "9c360e5ce980b88bb31a7b086dbb19469008154b"
|
||||
uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab"
|
||||
version = "0.0.10+0"
|
||||
|
||||
[[LibCURL]]
|
||||
deps = ["LibCURL_jll", "MozillaCACerts_jll"]
|
||||
uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
|
||||
|
||||
[[LibCURL_jll]]
|
||||
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
|
||||
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
|
||||
|
||||
[[LibGit2]]
|
||||
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
|
||||
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
|
||||
|
||||
[[LibSSH2_jll]]
|
||||
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
|
||||
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
|
||||
|
||||
[[Libdl]]
|
||||
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
|
||||
|
||||
[[LinearAlgebra]]
|
||||
deps = ["Libdl"]
|
||||
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
|
||||
|
||||
[[LogExpFunctions]]
|
||||
deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"]
|
||||
git-tree-sha1 = "3d682c07e6dd250ed082f883dc88aee7996bf2cc"
|
||||
uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
|
||||
version = "0.3.0"
|
||||
|
||||
[[Logging]]
|
||||
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
|
||||
|
||||
[[Markdown]]
|
||||
deps = ["Base64"]
|
||||
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
|
||||
|
||||
[[MbedTLS_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
|
||||
|
||||
[[Mmap]]
|
||||
uuid = "a63ad114-7e13-5084-954f-fe012c677804"
|
||||
|
||||
[[MozillaCACerts_jll]]
|
||||
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
|
||||
|
||||
[[NEO_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "gmmlib_jll", "libigc_jll", "oneAPI_Level_Zero_Headers_jll"]
|
||||
git-tree-sha1 = "2bfc354b5684821dcc88f1e477cefd0dd03c60b5"
|
||||
uuid = "700fe977-ac61-5f37-bbc8-c6c4b2b6a9fd"
|
||||
version = "21.31.20514+0"
|
||||
|
||||
[[NetworkOptions]]
|
||||
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
|
||||
|
||||
[[OpenSpecFun_jll]]
|
||||
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
|
||||
uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
|
||||
version = "0.5.5+0"
|
||||
|
||||
[[OrderedCollections]]
|
||||
git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c"
|
||||
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
|
||||
version = "1.4.1"
|
||||
|
||||
[[Parameters]]
|
||||
deps = ["OrderedCollections", "UnPack"]
|
||||
git-tree-sha1 = "2276ac65f1e236e0a6ea70baff3f62ad4c625345"
|
||||
uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
|
||||
version = "0.12.2"
|
||||
|
||||
[[Pkg]]
|
||||
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
|
||||
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
|
||||
|
||||
[[Preferences]]
|
||||
deps = ["TOML"]
|
||||
git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a"
|
||||
uuid = "21216c6a-2e73-6563-6e65-726566657250"
|
||||
version = "1.2.2"
|
||||
|
||||
[[Printf]]
|
||||
deps = ["Unicode"]
|
||||
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
|
||||
|
||||
[[REPL]]
|
||||
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
|
||||
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
|
||||
|
||||
[[Random]]
|
||||
deps = ["Serialization"]
|
||||
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
|
||||
|
||||
[[SHA]]
|
||||
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
|
||||
|
||||
[[SPIRV_LLVM_Translator_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "8cca87d57f6ddf19373cc9791fddc741406c8fbf"
|
||||
uuid = "4a5d46fc-d8cf-5151-a261-86b458210efb"
|
||||
version = "11.0.0+2"
|
||||
|
||||
[[SPIRV_Tools_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "c0324b7e07bc4649f755bfe7e00f7c6ed6aa353f"
|
||||
uuid = "6ac6d60f-d740-5983-97d7-a4482c0689f4"
|
||||
version = "2021.2.0+0"
|
||||
|
||||
[[Serialization]]
|
||||
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
|
||||
|
||||
[[SharedArrays]]
|
||||
deps = ["Distributed", "Mmap", "Random", "Serialization"]
|
||||
uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
|
||||
|
||||
[[Sockets]]
|
||||
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
|
||||
|
||||
[[SparseArrays]]
|
||||
deps = ["LinearAlgebra", "Random"]
|
||||
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
|
||||
|
||||
[[SpecialFunctions]]
|
||||
deps = ["ChainRulesCore", "LogExpFunctions", "OpenSpecFun_jll"]
|
||||
git-tree-sha1 = "a322a9493e49c5f3a10b50df3aedaf1cdb3244b7"
|
||||
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
|
||||
version = "1.6.1"
|
||||
|
||||
[[Statistics]]
|
||||
deps = ["LinearAlgebra", "SparseArrays"]
|
||||
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
|
||||
|
||||
[[TOML]]
|
||||
deps = ["Dates"]
|
||||
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
|
||||
|
||||
[[Tar]]
|
||||
deps = ["ArgTools", "SHA"]
|
||||
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
|
||||
|
||||
[[Test]]
|
||||
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
|
||||
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||
|
||||
[[TextWrap]]
|
||||
git-tree-sha1 = "9250ef9b01b66667380cf3275b3f7488d0e25faf"
|
||||
uuid = "b718987f-49a8-5099-9789-dcd902bef87d"
|
||||
version = "1.0.1"
|
||||
|
||||
[[TimerOutputs]]
|
||||
deps = ["ExprTools", "Printf"]
|
||||
git-tree-sha1 = "209a8326c4f955e2442c07b56029e88bb48299c7"
|
||||
uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
|
||||
version = "0.5.12"
|
||||
|
||||
[[UUIDs]]
|
||||
deps = ["Random", "SHA"]
|
||||
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
|
||||
|
||||
[[UnPack]]
|
||||
git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b"
|
||||
uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
|
||||
version = "1.0.2"
|
||||
|
||||
[[Unicode]]
|
||||
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
|
||||
|
||||
[[Zlib_jll]]
|
||||
deps = ["Libdl"]
|
||||
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
|
||||
|
||||
[[gmmlib_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "0d5e5461d21b14853b4c332045c57d2601c403bd"
|
||||
uuid = "09858cae-167c-5acb-9302-fddc6874d481"
|
||||
version = "21.2.1+0"
|
||||
|
||||
[[libigc_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "b30a895e7ea52991a3f984ab0302c42858d766c0"
|
||||
uuid = "94295238-5935-5bd7-bb0f-b00942e9bdd5"
|
||||
version = "1.0.8173+0"
|
||||
|
||||
[[nghttp2_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
|
||||
|
||||
[[oneAPI]]
|
||||
deps = ["Adapt", "CEnum", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "LinearAlgebra", "NEO_jll", "Printf", "Random", "SPIRV_LLVM_Translator_jll", "SPIRV_Tools_jll", "SpecialFunctions", "oneAPI_Level_Zero_Headers_jll", "oneAPI_Level_Zero_Loader_jll"]
|
||||
git-tree-sha1 = "92e8eefdd4694597994590230ab329545804bdb3"
|
||||
uuid = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"
|
||||
version = "0.2.0"
|
||||
|
||||
[[oneAPI_Level_Zero_Headers_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "e1d123ff9ada6c469a1eaf57e33a74c3cb26a5a4"
|
||||
uuid = "f4bc562b-d309-54f8-9efb-476e56f0410d"
|
||||
version = "1.2.13+0"
|
||||
|
||||
[[oneAPI_Level_Zero_Loader_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "oneAPI_Level_Zero_Headers_jll"]
|
||||
git-tree-sha1 = "50124857f7e87420655929a9c8ca86749826af11"
|
||||
uuid = "13eca655-d68d-5b81-8367-6d99d727ab01"
|
||||
version = "1.4.1+0"
|
||||
|
||||
[[p7zip_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
|
||||
7
JuliaStream.jl/oneAPI/Project.toml
Normal file
7
JuliaStream.jl/oneAPI/Project.toml
Normal file
@ -0,0 +1,7 @@
|
||||
[deps]
|
||||
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
|
||||
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
|
||||
oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"
|
||||
|
||||
[compat]
|
||||
julia = "1.6"
|
||||
167
JuliaStream.jl/src/AMDGPUStream.jl
Normal file
167
JuliaStream.jl/src/AMDGPUStream.jl
Normal file
@ -0,0 +1,167 @@
|
||||
# AMDGPU.jl doesn't support CPU agents, so this isn't a feature-complete ROCmStream, only AMD GPUs
|
||||
include("Stream.jl")
|
||||
using AMDGPU
|
||||
|
||||
const ROCData = StreamData{T,ROCArray{T}} where {T}
|
||||
const TBSize = 1024::Int
|
||||
const DotBlocks = 256::Int
|
||||
|
||||
function devices()::Vector{DeviceWithRepr}
|
||||
try
|
||||
# AMDGPU.agents()'s internal iteration order isn't stable
|
||||
sorted = sort(AMDGPU.get_agents(:gpu), by = repr)
|
||||
map(x -> (x, repr(x), "AMDGPU.jl"), sorted)
|
||||
catch
|
||||
# probably unsupported
|
||||
String[]
|
||||
end
|
||||
end
|
||||
|
||||
function make_stream(
|
||||
arraysize::Int,
|
||||
scalar::T,
|
||||
device::DeviceWithRepr,
|
||||
silent::Bool,
|
||||
)::Tuple{ROCData{T},Nothing} where {T}
|
||||
|
||||
if arraysize % TBSize != 0
|
||||
error("arraysize ($(arraysize)) must be divisible by $(TBSize)!")
|
||||
end
|
||||
|
||||
# XXX AMDGPU doesn't expose an API for setting the default like CUDA.device!()
|
||||
# but AMDGPU.get_default_agent returns DEFAULT_AGENT so we can do it by hand
|
||||
AMDGPU.DEFAULT_AGENT[] = device[1]
|
||||
selected = AMDGPU.get_default_agent()
|
||||
if !silent
|
||||
println("Using GPU HSA device: $(AMDGPU.get_name(selected)) ($(repr(selected)))")
|
||||
println("Kernel parameters : <<<$(arraysize),$(TBSize)>>>")
|
||||
end
|
||||
return (
|
||||
ROCData{T}(
|
||||
ROCArray{T}(undef, arraysize),
|
||||
ROCArray{T}(undef, arraysize),
|
||||
ROCArray{T}(undef, arraysize),
|
||||
scalar,
|
||||
arraysize,
|
||||
),
|
||||
nothing,
|
||||
)
|
||||
end
|
||||
|
||||
function init_arrays!(data::ROCData{T}, _, init::Tuple{T,T,T}) where {T}
|
||||
AMDGPU.fill!(data.a, init[1])
|
||||
AMDGPU.fill!(data.b, init[2])
|
||||
AMDGPU.fill!(data.c, init[3])
|
||||
end
|
||||
|
||||
function copy!(data::ROCData{T}, _) where {T}
|
||||
function kernel(a::AbstractArray{T}, c::AbstractArray{T})
|
||||
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
|
||||
@inbounds c[i] = a[i]
|
||||
return
|
||||
end
|
||||
AMDGPU.wait(
|
||||
@roc groupsize = TBSize gridsize = data.size kernel(data.a, data.c)
|
||||
)
|
||||
end
|
||||
|
||||
function mul!(data::ROCData{T}, _) where {T}
|
||||
function kernel(b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
|
||||
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
|
||||
@inbounds b[i] = scalar * c[i]
|
||||
return
|
||||
end
|
||||
AMDGPU.wait(
|
||||
@roc groupsize = TBSize gridsize = data.size kernel(data.b, data.c, data.scalar)
|
||||
)
|
||||
end
|
||||
|
||||
function add!(data::ROCData{T}, _) where {T}
|
||||
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T})
|
||||
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
|
||||
@inbounds c[i] = a[i] + b[i]
|
||||
return
|
||||
end
|
||||
AMDGPU.wait(
|
||||
@roc groupsize = TBSize gridsize = data.size kernel(data.a, data.b, data.c)
|
||||
)
|
||||
end
|
||||
|
||||
function triad!(data::ROCData{T}, _) where {T}
|
||||
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
|
||||
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
|
||||
@inbounds a[i] = b[i] + (scalar * c[i])
|
||||
return
|
||||
end
|
||||
AMDGPU.wait(
|
||||
@roc groupsize = TBSize gridsize = data.size kernel(
|
||||
data.a,
|
||||
data.b,
|
||||
data.c,
|
||||
data.scalar,
|
||||
)
|
||||
)
|
||||
end
|
||||
|
||||
function nstream!(data::ROCData{T}, _) where {T}
|
||||
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
|
||||
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
|
||||
@inbounds a[i] += b[i] + scalar * c[i]
|
||||
return
|
||||
end
|
||||
AMDGPU.wait(
|
||||
@roc groupsize = TBSize gridsize = data.size kernel(
|
||||
data.a,
|
||||
data.b,
|
||||
data.c,
|
||||
data.scalar,
|
||||
)
|
||||
)
|
||||
end
|
||||
|
||||
function dot(data::ROCData{T}, _) where {T}
|
||||
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, size::Int, partial::AbstractArray{T})
|
||||
tb_sum = ROCDeviceArray((TBSize,), alloc_local(:reduce, T, TBSize))
|
||||
local_i = workitemIdx().x
|
||||
@inbounds tb_sum[local_i] = 0.0
|
||||
|
||||
# do dot first
|
||||
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
|
||||
while i <= size
|
||||
@inbounds tb_sum[local_i] += a[i] * b[i]
|
||||
i += TBSize * DotBlocks # XXX don't use (workgroupDim().x * gridDimWG().x) here
|
||||
end
|
||||
|
||||
# then tree reduction
|
||||
offset = workgroupDim().x ÷ 2
|
||||
while offset > 0
|
||||
sync_workgroup()
|
||||
if (local_i - 1) < offset
|
||||
@inbounds tb_sum[local_i] += tb_sum[local_i+offset]
|
||||
end
|
||||
offset ÷= 2
|
||||
end
|
||||
|
||||
if (local_i == 1)
|
||||
@inbounds partial[workgroupIdx().x] = tb_sum[local_i]
|
||||
end
|
||||
|
||||
return
|
||||
end
|
||||
partial_sum = ROCArray{T}(undef, DotBlocks)
|
||||
AMDGPU.wait(
|
||||
@roc groupsize = TBSize gridsize = TBSize * DotBlocks kernel(
|
||||
data.a,
|
||||
data.b,
|
||||
data.size,
|
||||
partial_sum,
|
||||
)
|
||||
)
|
||||
return sum(partial_sum)
|
||||
end
|
||||
|
||||
function read_data(data::ROCData{T}, _)::VectorData{T} where {T}
|
||||
return VectorData{T}(data.a, data.b, data.c, data.scalar, data.size)
|
||||
end
|
||||
|
||||
main()
|
||||
152
JuliaStream.jl/src/CUDAStream.jl
Normal file
152
JuliaStream.jl/src/CUDAStream.jl
Normal file
@ -0,0 +1,152 @@
|
||||
include("Stream.jl")
|
||||
using CUDA
|
||||
|
||||
const CuData = StreamData{T,CuArray{T}} where {T}
|
||||
const TBSize = 1024::Int
|
||||
const DotBlocks = 256::Int
|
||||
|
||||
function devices()::Vector{DeviceWithRepr}
|
||||
return !CUDA.functional(false) ? String[] :
|
||||
map(d -> (d, "$(CUDA.name(d)) ($(repr(d)))", "CUDA.jl"), CUDA.devices())
|
||||
end
|
||||
|
||||
function make_stream(
|
||||
arraysize::Int,
|
||||
scalar::T,
|
||||
device::DeviceWithRepr,
|
||||
silent::Bool,
|
||||
)::Tuple{CuData{T},Nothing} where {T}
|
||||
|
||||
if arraysize % TBSize != 0
|
||||
error("arraysize ($(arraysize)) must be divisible by $(TBSize)!")
|
||||
end
|
||||
|
||||
CUDA.device!(device[1])
|
||||
selected = CUDA.device()
|
||||
# show_reason is set to true here so it dumps CUDA info
|
||||
# for us regardless of whether it's functional
|
||||
if !CUDA.functional(true)
|
||||
error("Non-functional CUDA configuration")
|
||||
end
|
||||
if !silent
|
||||
println("Using CUDA device: $(CUDA.name(selected)) ($(repr(selected)))")
|
||||
println("Kernel parameters: <<<$(arraysize ÷ TBSize),$(TBSize)>>>")
|
||||
end
|
||||
return (
|
||||
CuData{T}(
|
||||
CuArray{T}(undef, arraysize),
|
||||
CuArray{T}(undef, arraysize),
|
||||
CuArray{T}(undef, arraysize),
|
||||
scalar,
|
||||
arraysize,
|
||||
),
|
||||
nothing,
|
||||
)
|
||||
end
|
||||
|
||||
function init_arrays!(data::CuData{T}, _, init::Tuple{T,T,T}) where {T}
|
||||
fill!(data.a, init[1])
|
||||
fill!(data.b, init[2])
|
||||
fill!(data.c, init[3])
|
||||
end
|
||||
|
||||
function copy!(data::CuData{T}, _) where {T}
|
||||
function kernel(a::AbstractArray{T}, c::AbstractArray{T})
|
||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
|
||||
@inbounds c[i] = a[i]
|
||||
return
|
||||
end
|
||||
@cuda blocks = data.size ÷ TBSize threads = TBSize kernel(data.a, data.c)
|
||||
CUDA.synchronize()
|
||||
end
|
||||
|
||||
function mul!(data::CuData{T}, _) where {T}
|
||||
function kernel(b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
|
||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
|
||||
@inbounds b[i] = scalar * c[i]
|
||||
return
|
||||
end
|
||||
@cuda blocks = data.size ÷ TBSize threads = TBSize kernel(data.b, data.c, data.scalar)
|
||||
CUDA.synchronize()
|
||||
end
|
||||
|
||||
function add!(data::CuData{T}, _) where {T}
|
||||
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T})
|
||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
|
||||
@inbounds c[i] = a[i] + b[i]
|
||||
return
|
||||
end
|
||||
@cuda blocks = data.size ÷ TBSize threads = TBSize kernel(data.a, data.b, data.c)
|
||||
CUDA.synchronize()
|
||||
end
|
||||
|
||||
function triad!(data::CuData{T}, _) where {T}
|
||||
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
|
||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
|
||||
@inbounds a[i] = b[i] + (scalar * c[i])
|
||||
return
|
||||
end
|
||||
@cuda blocks = data.size ÷ TBSize threads = TBSize kernel(
|
||||
data.a,
|
||||
data.b,
|
||||
data.c,
|
||||
data.scalar,
|
||||
)
|
||||
CUDA.synchronize()
|
||||
end
|
||||
|
||||
function nstream!(data::CuData{T}, _) where {T}
|
||||
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
|
||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
|
||||
@inbounds a[i] += b[i] + scalar * c[i]
|
||||
return
|
||||
end
|
||||
@cuda blocks = data.size ÷ TBSize threads = TBSize kernel(
|
||||
data.a,
|
||||
data.b,
|
||||
data.c,
|
||||
data.scalar,
|
||||
)
|
||||
CUDA.synchronize()
|
||||
end
|
||||
|
||||
function dot(data::CuData{T}, _) where {T}
|
||||
# direct port of the reduction in CUDAStream.cu
|
||||
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, size::Int, partial::AbstractArray{T})
|
||||
tb_sum = @cuStaticSharedMem(T, TBSize)
|
||||
local_i = threadIdx().x
|
||||
@inbounds tb_sum[local_i] = 0.0
|
||||
|
||||
# do dot first
|
||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
|
||||
while i <= size
|
||||
@inbounds tb_sum[local_i] += a[i] * b[i]
|
||||
i += blockDim().x * gridDim().x
|
||||
end
|
||||
|
||||
# then tree reduction
|
||||
offset = blockDim().x ÷ 2
|
||||
while offset > 0
|
||||
sync_threads()
|
||||
if (local_i - 1) < offset
|
||||
@inbounds tb_sum[local_i] += tb_sum[local_i+offset]
|
||||
end
|
||||
offset ÷= 2
|
||||
end
|
||||
|
||||
if (local_i == 1)
|
||||
@inbounds partial[blockIdx().x] = tb_sum[local_i]
|
||||
end
|
||||
|
||||
return
|
||||
end
|
||||
partial_sum = CuArray{T}(undef, DotBlocks)
|
||||
@cuda blocks = DotBlocks threads = TBSize kernel(data.a, data.b, data.size, partial_sum)
|
||||
return sum(partial_sum)
|
||||
end
|
||||
|
||||
function read_data(data::CuData{T}, _)::VectorData{T} where {T}
|
||||
return VectorData{T}(data.a, data.b, data.c, data.scalar, data.size)
|
||||
end
|
||||
|
||||
main()
|
||||
85
JuliaStream.jl/src/DistributedStream.jl
Normal file
85
JuliaStream.jl/src/DistributedStream.jl
Normal file
@ -0,0 +1,85 @@
|
||||
using Distributed
|
||||
|
||||
@everywhere using Pkg
|
||||
@everywhere Pkg.activate("."; io = devnull) # don't spam `Activating environment at...`
|
||||
@everywhere include("StreamData.jl")
|
||||
@everywhere include("Stream.jl")
|
||||
@everywhere using SharedArrays
|
||||
@everywhere const SharedArrayData = StreamData{T,SharedArray{T}} where {T}
|
||||
|
||||
function devices()::Vector{DeviceWithRepr}
|
||||
return [(undef, "CPU (localhost) $(nworkers())P", "Distributed.jl")]
|
||||
end
|
||||
|
||||
function make_stream(
|
||||
arraysize::Int,
|
||||
scalar::T,
|
||||
_::DeviceWithRepr,
|
||||
silent::Bool,
|
||||
)::Tuple{SharedArrayData{T},Nothing} where {T}
|
||||
|
||||
if !silent
|
||||
println("Using max $(nworkers()) process(es) + 1 master")
|
||||
end
|
||||
return (
|
||||
SharedArrayData{T}(
|
||||
SharedArray{T}(arraysize),
|
||||
SharedArray{T}(arraysize),
|
||||
SharedArray{T}(arraysize),
|
||||
scalar,
|
||||
arraysize,
|
||||
),
|
||||
nothing,
|
||||
)
|
||||
end
|
||||
|
||||
function init_arrays!(data::SharedArrayData{T}, _, init::Tuple{T,T,T}) where {T}
|
||||
|
||||
@sync @distributed for i = 1:data.size
|
||||
@inbounds data.a[i] = init[1]
|
||||
@inbounds data.b[i] = init[2]
|
||||
@inbounds data.c[i] = init[3]
|
||||
end
|
||||
end
|
||||
|
||||
function copy!(data::SharedArrayData{T}, _) where {T}
|
||||
@sync @distributed for i = 1:data.size
|
||||
@inbounds data.c[i] = data.a[i]
|
||||
end
|
||||
end
|
||||
|
||||
function mul!(data::SharedArrayData{T}, _) where {T}
|
||||
@sync @distributed for i = 1:data.size
|
||||
@inbounds data.b[i] = data.scalar * data.c[i]
|
||||
end
|
||||
end
|
||||
|
||||
function add!(data::SharedArrayData{T}, _) where {T}
|
||||
@sync @distributed for i = 1:data.size
|
||||
@inbounds data.c[i] = data.a[i] + data.b[i]
|
||||
end
|
||||
end
|
||||
|
||||
function triad!(data::SharedArrayData{T}, _) where {T}
|
||||
@sync @distributed for i = 1:data.size
|
||||
@inbounds data.a[i] = data.b[i] + (data.scalar * data.c[i])
|
||||
end
|
||||
end
|
||||
|
||||
function nstream!(data::SharedArrayData{T}, _) where {T}
|
||||
@sync @distributed for i = 1:data.size
|
||||
@inbounds data.a[i] += data.b[i] + data.scalar * data.c[i]
|
||||
end
|
||||
end
|
||||
|
||||
function dot(data::SharedArrayData{T}, _) where {T}
|
||||
return @distributed (+) for i = 1:data.size
|
||||
@inbounds data.a[i] * data.b[i]
|
||||
end
|
||||
end
|
||||
|
||||
function read_data(data::SharedArrayData{T}, _)::VectorData{T} where {T}
|
||||
return VectorData{T}(data.a, data.b, data.c, data.scalar, data.size)
|
||||
end
|
||||
|
||||
main()
|
||||
4
JuliaStream.jl/src/JuliaStream.jl
Normal file
4
JuliaStream.jl/src/JuliaStream.jl
Normal file
@ -0,0 +1,4 @@
|
||||
module JuliaStream
|
||||
end
|
||||
|
||||
println("Please run benchmarks directly via `julia --project src/<IMPL>Stream.jl`")
|
||||
255
JuliaStream.jl/src/KernelAbstractionsStream.jl
Normal file
255
JuliaStream.jl/src/KernelAbstractionsStream.jl
Normal file
@ -0,0 +1,255 @@
|
||||
using ROCKernels, CUDAKernels, KernelAbstractions, CUDA, AMDGPU
|
||||
include("Stream.jl")
|
||||
|
||||
const CuData = StreamData{T,CUDA.CuArray{T}} where {T}
|
||||
const ROCData = StreamData{T,AMDGPU.ROCArray{T}} where {T}
|
||||
|
||||
const TBSize = 1024::Int
|
||||
const DotBlocks = 256::Int
|
||||
|
||||
@enum Backend cuda rocm cpu
|
||||
|
||||
struct Context
|
||||
backend::Backend
|
||||
device::Device
|
||||
end
|
||||
|
||||
function list_rocm_devices()::Vector{DeviceWithRepr}
|
||||
try
|
||||
# AMDGPU.agents()'s internal iteration order isn't stable
|
||||
sorted = sort(AMDGPU.get_agents(:gpu), by = repr)
|
||||
map(x -> (x, repr(x), rocm), sorted)
|
||||
catch
|
||||
# probably unsupported
|
||||
[]
|
||||
end
|
||||
end
|
||||
|
||||
function list_cuda_devices()::Vector{DeviceWithRepr}
|
||||
return !CUDA.functional(false) ? String[] :
|
||||
map(d -> (d, "$(CUDA.name(d)) ($(repr(d)))", cuda), CUDA.devices())
|
||||
end
|
||||
|
||||
function devices()::Vector{DeviceWithRepr}
|
||||
cudas = list_cuda_devices()
|
||||
rocms = list_rocm_devices()
|
||||
cpus = [(undef, "$(Sys.cpu_info()[1].model) ($(Threads.nthreads())T)", cpu)]
|
||||
vcat(cpus, cudas, rocms)
|
||||
end
|
||||
|
||||
function make_stream(
|
||||
arraysize::Int,
|
||||
scalar::T,
|
||||
device::DeviceWithRepr,
|
||||
silent::Bool,
|
||||
) where {T}
|
||||
|
||||
if arraysize % TBSize != 0
|
||||
error("arraysize ($(arraysize)) must be divisible by $(TBSize)!")
|
||||
end
|
||||
|
||||
(selected, _, backend) = device
|
||||
if backend == cpu
|
||||
if !silent
|
||||
println("Using CPU with max $(Threads.nthreads()) threads")
|
||||
end
|
||||
partialsum = Vector{T}(undef, DotBlocks)
|
||||
data = VectorData{T}(
|
||||
Vector{T}(undef, arraysize),
|
||||
Vector{T}(undef, arraysize),
|
||||
Vector{T}(undef, arraysize),
|
||||
scalar,
|
||||
arraysize,
|
||||
)
|
||||
backenddevice = CPU()
|
||||
elseif backend == cuda
|
||||
CUDA.device!(selected)
|
||||
if CUDA.device() != selected
|
||||
error("Cannot select CUDA device, expecting $selected, but got $(CUDA.device())")
|
||||
end
|
||||
if !CUDA.functional(true)
|
||||
error("Non-functional CUDA configuration")
|
||||
end
|
||||
if !silent
|
||||
println("Using CUDA device: $(CUDA.name(selected)) ($(repr(selected)))")
|
||||
end
|
||||
partialsum = CuArray{T}(undef, DotBlocks)
|
||||
data = CuData{T}(
|
||||
CuArray{T}(undef, arraysize),
|
||||
CuArray{T}(undef, arraysize),
|
||||
CuArray{T}(undef, arraysize),
|
||||
scalar,
|
||||
arraysize,
|
||||
)
|
||||
backenddevice = CUDADevice()
|
||||
elseif backend == rocm
|
||||
AMDGPU.DEFAULT_AGENT[] = selected
|
||||
if AMDGPU.get_default_agent() != selected
|
||||
error(
|
||||
"Cannot select HSA device, expecting $selected, but got $(AMDGPU.get_default_agent())",
|
||||
)
|
||||
end
|
||||
if !silent
|
||||
println("Using GPU HSA device: $(AMDGPU.get_name(selected)) ($(repr(selected)))")
|
||||
end
|
||||
partialsum = ROCArray{T}(undef, DotBlocks)
|
||||
data = ROCData{T}(
|
||||
ROCArray{T}(undef, arraysize),
|
||||
ROCArray{T}(undef, arraysize),
|
||||
ROCArray{T}(undef, arraysize),
|
||||
scalar,
|
||||
arraysize,
|
||||
)
|
||||
backenddevice = ROCDevice()
|
||||
else
|
||||
error("unsupported backend $(backend)")
|
||||
end
|
||||
|
||||
if !silent
|
||||
println("Kernel parameters : <<<$(data.size),$(TBSize)>>>")
|
||||
end
|
||||
return (data, Context(backend, backenddevice))
|
||||
end
|
||||
|
||||
function init_arrays!(
|
||||
data::StreamData{T,C},
|
||||
context::Context,
|
||||
init::Tuple{T,T,T},
|
||||
) where {T,C}
|
||||
if context.backend == cpu
|
||||
Threads.@threads for i = 1:data.size
|
||||
@inbounds data.a[i] = init[1]
|
||||
@inbounds data.b[i] = init[2]
|
||||
@inbounds data.c[i] = init[3]
|
||||
end
|
||||
elseif context.backend == cuda
|
||||
CUDA.fill!(data.a, init[1])
|
||||
CUDA.fill!(data.b, init[2])
|
||||
CUDA.fill!(data.c, init[3])
|
||||
elseif context.backend == rocm
|
||||
AMDGPU.fill!(data.a, init[1])
|
||||
AMDGPU.fill!(data.b, init[2])
|
||||
AMDGPU.fill!(data.c, init[3])
|
||||
else
|
||||
error("unsupported backend $(backend)")
|
||||
end
|
||||
end
|
||||
|
||||
function copy!(data::StreamData{T,C}, context::Context) where {T,C}
|
||||
@kernel function kernel(@Const(a::AbstractArray{T}), c)
|
||||
i = @index(Global)
|
||||
@inbounds c[i] = a[i]
|
||||
end
|
||||
wait(kernel(context.device, TBSize)(data.a, data.c, ndrange = data.size))
|
||||
end
|
||||
|
||||
function mul!(data::StreamData{T,C}, context::Context) where {T,C}
|
||||
@kernel function kernel(b::AbstractArray{T}, @Const(c::AbstractArray{T}), scalar::T)
|
||||
i = @index(Global)
|
||||
@inbounds b[i] = scalar * c[i]
|
||||
end
|
||||
wait(kernel(context.device, TBSize)(data.b, data.c, data.scalar, ndrange = data.size))
|
||||
end
|
||||
|
||||
function add!(data::StreamData{T,C}, context::Context) where {T,C}
|
||||
@kernel function kernel(@Const(a::AbstractArray{T}), @Const(b::AbstractArray{T}), c)
|
||||
i = @index(Global)
|
||||
@inbounds c[i] = a[i] + b[i]
|
||||
end
|
||||
wait(kernel(context.device, TBSize)(data.a, data.b, data.c, ndrange = data.size))
|
||||
end
|
||||
|
||||
function triad!(data::StreamData{T,C}, context::Context) where {T,C}
|
||||
@kernel function kernel(a::AbstractArray{T}, @Const(b::AbstractArray{T}), @Const(c), scalar::T)
|
||||
i = @index(Global)
|
||||
@inbounds a[i] = b[i] + (scalar * c[i])
|
||||
end
|
||||
wait(
|
||||
kernel(context.device, TBSize)(
|
||||
data.a,
|
||||
data.b,
|
||||
data.c,
|
||||
data.scalar,
|
||||
ndrange = data.size,
|
||||
),
|
||||
)
|
||||
end
|
||||
|
||||
function nstream!(data::StreamData{T,C}, context::Context) where {T,C}
|
||||
@kernel function kernel(a::AbstractArray{T}, @Const(b::AbstractArray{T}), @Const(c), scalar::T)
|
||||
i = @index(Global)
|
||||
@inbounds a[i] += b[i] + scalar * c[i]
|
||||
end
|
||||
wait(
|
||||
kernel(context.device, TBSize)(
|
||||
data.a,
|
||||
data.b,
|
||||
data.c,
|
||||
data.scalar,
|
||||
ndrange = data.size,
|
||||
),
|
||||
)
|
||||
end
|
||||
|
||||
function dot(data::StreamData{T,C}, context::Context) where {T,C}
|
||||
@kernel function kernel(@Const(a::AbstractArray{T}), @Const(b::AbstractArray{T}), size::Int, partial::AbstractArray{T})
|
||||
local_i = @index(Local)
|
||||
group_i = @index(Group)
|
||||
tb_sum = @localmem T TBSize
|
||||
@inbounds tb_sum[local_i] = 0.0
|
||||
|
||||
# do dot first
|
||||
i = @index(Global)
|
||||
while i <= size
|
||||
@inbounds tb_sum[local_i] += a[i] * b[i]
|
||||
i += TBSize * DotBlocks
|
||||
end
|
||||
|
||||
# then tree reduction
|
||||
# FIXME this does not compile when targeting CPUs:
|
||||
# see https://github.com/JuliaGPU/KernelAbstractions.jl/issues/262
|
||||
offset = @private Int64 (1,)
|
||||
@inbounds begin
|
||||
offset[1] = @groupsize()[1] ÷ 2
|
||||
while offset[1] > 0
|
||||
@synchronize
|
||||
if (local_i - 1) < offset[1]
|
||||
tb_sum[local_i] += tb_sum[local_i+offset[1]]
|
||||
end
|
||||
offset[1] ÷= 2
|
||||
end
|
||||
end
|
||||
|
||||
if (local_i == 1)
|
||||
@inbounds partial[group_i] = tb_sum[local_i]
|
||||
end
|
||||
end
|
||||
|
||||
if context.backend == cpu
|
||||
partial_sum = Vector{T}(undef, DotBlocks)
|
||||
elseif context.backend == cuda
|
||||
partial_sum = CuArray{T}(undef, DotBlocks)
|
||||
elseif context.backend == rocm
|
||||
partial_sum = ROCArray{T}(undef, DotBlocks)
|
||||
else
|
||||
error("unsupported backend $(backend)")
|
||||
end
|
||||
|
||||
wait(
|
||||
kernel(context.device, TBSize)(
|
||||
data.a,
|
||||
data.b,
|
||||
data.size,
|
||||
partial_sum,
|
||||
ndrange = TBSize * DotBlocks,
|
||||
),
|
||||
)
|
||||
|
||||
return sum(partial_sum)
|
||||
end
|
||||
|
||||
function read_data(data::StreamData{T,C}, _::Context)::VectorData{T} where {T,C}
|
||||
return VectorData{T}(data.a, data.b, data.c, data.scalar, data.size)
|
||||
end
|
||||
|
||||
main()
|
||||
75
JuliaStream.jl/src/PlainStream.jl
Normal file
75
JuliaStream.jl/src/PlainStream.jl
Normal file
@ -0,0 +1,75 @@
|
||||
include("Stream.jl")
|
||||
|
||||
function devices()::Vector{DeviceWithRepr}
|
||||
return [(undef, "CPU", "Palin")]
|
||||
end
|
||||
|
||||
function make_stream(
|
||||
arraysize::Int,
|
||||
scalar::T,
|
||||
_::DeviceWithRepr,
|
||||
silent::Bool,
|
||||
)::Tuple{VectorData{T},Nothing} where {T}
|
||||
return (
|
||||
VectorData{T}(
|
||||
Vector{T}(undef, arraysize),
|
||||
Vector{T}(undef, arraysize),
|
||||
Vector{T}(undef, arraysize),
|
||||
scalar,
|
||||
arraysize,
|
||||
),
|
||||
nothing
|
||||
)
|
||||
end
|
||||
|
||||
function init_arrays!(data::VectorData{T}, _, init::Tuple{T,T,T}) where {T}
|
||||
for i = 1:data.size
|
||||
@inbounds data.a[i] = init[1]
|
||||
@inbounds data.b[i] = init[2]
|
||||
@inbounds data.c[i] = init[3]
|
||||
end
|
||||
end
|
||||
|
||||
function copy!(data::VectorData{T}, _) where {T}
|
||||
for i = 1:data.size
|
||||
@inbounds data.c[i] = data.a[i]
|
||||
end
|
||||
end
|
||||
|
||||
function mul!(data::VectorData{T}, _) where {T}
|
||||
for i = 1:data.size
|
||||
@inbounds data.b[i] = data.scalar * data.c[i]
|
||||
end
|
||||
end
|
||||
|
||||
function add!(data::VectorData{T}, _) where {T}
|
||||
for i = 1:data.size
|
||||
@inbounds data.c[i] = data.a[i] + data.b[i]
|
||||
end
|
||||
end
|
||||
|
||||
function triad!(data::VectorData{T}, _) where {T}
|
||||
for i = 1:data.size
|
||||
@inbounds data.a[i] = data.b[i] + (data.scalar * data.c[i])
|
||||
end
|
||||
end
|
||||
|
||||
function nstream!(data::VectorData{T}, _) where {T}
|
||||
for i = 1:data.size
|
||||
@inbounds data.a[i] += data.b[i] + data.scalar * data.c[i]
|
||||
end
|
||||
end
|
||||
|
||||
function dot(data::VectorData{T}, _) where {T}
|
||||
sum = zero(T)
|
||||
for i = 1:data.size
|
||||
@inbounds sum += data.a[i] * data.b[i]
|
||||
end
|
||||
return sum
|
||||
end
|
||||
|
||||
function read_data(data::VectorData{T}, _)::VectorData{T} where {T}
|
||||
return data
|
||||
end
|
||||
|
||||
main()
|
||||
300
JuliaStream.jl/src/Stream.jl
Normal file
300
JuliaStream.jl/src/Stream.jl
Normal file
@ -0,0 +1,300 @@
|
||||
using ArgParse
|
||||
using Parameters
|
||||
using Printf
|
||||
using Base: Float64, Int
|
||||
|
||||
include("StreamData.jl")
|
||||
|
||||
const VectorData = StreamData{T,Vector{T}} where {T}
|
||||
|
||||
const DeviceWithRepr = Tuple{Any,String,Any}
|
||||
|
||||
struct Timings
|
||||
copy::Vector{Float64}
|
||||
mul::Vector{Float64}
|
||||
add::Vector{Float64}
|
||||
triad::Vector{Float64}
|
||||
dot::Vector{Float64}
|
||||
Timings(n) = new(zeros(n), zeros(n), zeros(n), zeros(n), zeros(n))
|
||||
end
|
||||
|
||||
@enum Benchmark All Triad Nstream
|
||||
|
||||
function run_all!(data::StreamData{T,C}, context, times::Int)::Tuple{Timings,T} where {T,C}
|
||||
timings = Timings(times)
|
||||
lastSum::T = 0
|
||||
for i = 1:times
|
||||
@inbounds timings.copy[i] = @elapsed copy!(data, context)
|
||||
@inbounds timings.mul[i] = @elapsed mul!(data, context)
|
||||
@inbounds timings.add[i] = @elapsed add!(data, context)
|
||||
@inbounds timings.triad[i] = @elapsed triad!(data, context)
|
||||
@inbounds timings.dot[i] = @elapsed lastSum = dot(data, context)
|
||||
end
|
||||
return (timings, lastSum)
|
||||
end
|
||||
|
||||
function run_triad!(data::StreamData{T,C}, context, times::Int)::Float64 where {T,C}
|
||||
return @elapsed for _ = 1:times
|
||||
triad!(data, context)
|
||||
end
|
||||
end
|
||||
|
||||
function run_nstream!(
|
||||
data::StreamData{T,C},
|
||||
context,
|
||||
times::Int,
|
||||
)::Vector{Float64} where {T,C}
|
||||
timings::Vector{Float64} = zeros(times)
|
||||
for i = 1:times
|
||||
@inbounds timings[i] = @elapsed nstream!(data, context)
|
||||
end
|
||||
return timings
|
||||
end
|
||||
|
||||
function check_solutions(
|
||||
data::StreamData{T,C},
|
||||
times::Int,
|
||||
init::Tuple{T,T,T},
|
||||
benchmark::Benchmark,
|
||||
dot::Union{T,Nothing},
|
||||
) where {T,C}
|
||||
(gold_a, gold_b, gold_c) = init
|
||||
for _ = 1:times
|
||||
if benchmark == All
|
||||
gold_c = gold_a
|
||||
gold_b = data.scalar * gold_c
|
||||
gold_c = gold_a + gold_b
|
||||
gold_a = gold_b + data.scalar * gold_c
|
||||
elseif benchmark == Triad
|
||||
gold_a = gold_b + data.scalar * gold_c
|
||||
elseif benchmark == Nstream
|
||||
gold_a += gold_b + data.scalar * gold_c
|
||||
else
|
||||
error("Unknown benchmark", benchmark)
|
||||
end
|
||||
end
|
||||
|
||||
tolerance = eps(T) * 100
|
||||
function validate_xs(name::String, xs::AbstractArray{T}, from::T)
|
||||
error = (map(x -> abs(x - from), xs) |> sum) / length(xs)
|
||||
failed = error > tolerance
|
||||
if failed
|
||||
println("Validation failed on $name. Average error $error")
|
||||
end
|
||||
!failed
|
||||
end
|
||||
a_valid = validate_xs("a", data.a, gold_a)
|
||||
b_valid = validate_xs("b", data.b, gold_b)
|
||||
c_valid = validate_xs("c", data.c, gold_c)
|
||||
dot_valid =
|
||||
dot !== nothing ?
|
||||
begin
|
||||
gold_sum = gold_a * gold_b * data.size
|
||||
error = abs((dot - gold_sum) / gold_sum)
|
||||
failed = error > 1.0e-8
|
||||
if failed
|
||||
println(
|
||||
"Validation failed on sum. Error $error \nSum was $dot but should be $gold_sum",
|
||||
)
|
||||
end
|
||||
!failed
|
||||
end : true
|
||||
|
||||
a_valid && b_valid && c_valid && dot_valid
|
||||
end
|
||||
|
||||
@with_kw mutable struct Config
|
||||
list::Bool = false
|
||||
device::Int = 1
|
||||
numtimes::Int = 100
|
||||
arraysize::Int = 33554432
|
||||
float::Bool = false
|
||||
triad_only::Bool = false
|
||||
nstream_only::Bool = false
|
||||
csv::Bool = false
|
||||
mibibytes::Bool = false
|
||||
end
|
||||
|
||||
function parse_options(given::Config)
|
||||
s = ArgParseSettings()
|
||||
@add_arg_table s begin
|
||||
"--list"
|
||||
help = "List available devices"
|
||||
action = :store_true
|
||||
"--device", "-d"
|
||||
help = "Select device at DEVICE, NOTE: Julia is 1-indexed"
|
||||
arg_type = Int
|
||||
default = given.device
|
||||
"--numtimes", "-n"
|
||||
help = "Run the test NUMTIMES times (NUM >= 2)"
|
||||
arg_type = Int
|
||||
default = given.numtimes
|
||||
"--arraysize", "-s"
|
||||
help = "Use ARRAYSIZE elements in the array"
|
||||
arg_type = Int
|
||||
default = given.arraysize
|
||||
"--float"
|
||||
help = "Use floats (rather than doubles)"
|
||||
action = :store_true
|
||||
"--triad_only"
|
||||
help = "Only run triad"
|
||||
action = :store_true
|
||||
"--nstream_only"
|
||||
help = "Only run nstream"
|
||||
action = :store_true
|
||||
"--csv"
|
||||
help = "Output as csv table"
|
||||
action = :store_true
|
||||
"--mibibytes"
|
||||
help = "Use MiB=2^20 for bandwidth calculation (default MB=10^6)"
|
||||
action = :store_true
|
||||
end
|
||||
args = parse_args(s)
|
||||
# surely there's a better way than doing this:
|
||||
for (arg, val) in args
|
||||
setproperty!(given, Symbol(arg), val)
|
||||
end
|
||||
end
|
||||
|
||||
const DefaultInit = (0.1, 0.2, 0.0)
|
||||
const DefaultScalar = 0.4
|
||||
const Version = "3.4.0"
|
||||
|
||||
function main()
|
||||
|
||||
config::Config = Config()
|
||||
parse_options(config)
|
||||
|
||||
if config.list
|
||||
for (i, (_,repr, impl)) in enumerate(devices())
|
||||
println("[$i] ($impl) $repr")
|
||||
end
|
||||
exit(0)
|
||||
end
|
||||
|
||||
ds = devices()
|
||||
# TODO implement substring device match
|
||||
if config.device < 1 || config.device > length(ds)
|
||||
error(
|
||||
"Device $(config.device) out of range (1..$(length(ds))), NOTE: Julia is 1-indexed",
|
||||
)
|
||||
else
|
||||
device = ds[config.device]
|
||||
end
|
||||
|
||||
type = config.float ? Float32 : Float64
|
||||
|
||||
if config.nstream_only && !config.triad_only
|
||||
benchmark = Nstream
|
||||
elseif !config.nstream_only && config.triad_only
|
||||
benchmark = Triad
|
||||
elseif !config.nstream_only && !config.triad_only
|
||||
benchmark = All
|
||||
elseif config.nstream_only && config.triad_only
|
||||
error("Both triad and nstream are enabled, pick one or omit both to run all benchmarks")
|
||||
else
|
||||
error("Invalid config: $(repr(config))")
|
||||
end
|
||||
|
||||
array_bytes = config.arraysize * sizeof(type)
|
||||
total_bytes = array_bytes * 3
|
||||
(mega_scale, mega_suffix, giga_scale, giga_suffix) =
|
||||
!config.mibibytes ? (1.0e-6, "MB", 1.0e-9, "GB") : (2^-20, "MiB", 2^-30, "GiB")
|
||||
|
||||
if !config.csv
|
||||
println("""BabelStream
|
||||
Version: $Version
|
||||
Implementation: Julia; $(PROGRAM_FILE)""")
|
||||
println("Running kernels $(config.numtimes) times")
|
||||
if benchmark == Triad
|
||||
println("Number of elements: $(config.arraysize)")
|
||||
end
|
||||
println("Precision: $(config.float ? "float" : "double")")
|
||||
r1 = n -> round(n; digits = 1)
|
||||
println(
|
||||
"Array size: $(r1(mega_scale * array_bytes)) $mega_suffix(=$(r1(giga_scale * array_bytes)) $giga_suffix)",
|
||||
)
|
||||
println(
|
||||
"Total size: $(r1(mega_scale * total_bytes)) $mega_suffix(=$(r1(giga_scale * total_bytes)) $giga_suffix)",
|
||||
)
|
||||
end
|
||||
|
||||
function mk_row(xs::Vector{Float64}, name::String, total_bytes::Int)
|
||||
tail = Base.rest(xs)
|
||||
min = Iterators.minimum(tail)
|
||||
max = Iterators.maximum(tail)
|
||||
avg = Iterators.sum(tail) / Iterators.length(tail)
|
||||
mbps = mega_scale * total_bytes / min
|
||||
if config.csv
|
||||
return [
|
||||
("function", name),
|
||||
("num_times", config.numtimes),
|
||||
("n_elements", config.arraysize),
|
||||
("sizeof", total_bytes),
|
||||
("max_m$( config.mibibytes ? "i" : "")bytes_per_sec", mbps),
|
||||
("min_runtime", min),
|
||||
("max_runtime", max),
|
||||
("avg_runtime", avg),
|
||||
]
|
||||
else
|
||||
return [
|
||||
("Function", name),
|
||||
("M$(config.mibibytes ? "i" : "")Bytes/sec", round(mbps; digits = 3)),
|
||||
("Min (sec)", round(min; digits = 5)),
|
||||
("Max", round(max; digits = 5)),
|
||||
("Average", round(avg; digits = 5)),
|
||||
]
|
||||
end
|
||||
end
|
||||
|
||||
function tabulate(rows::Vector{Tuple{String,Any}}...)
|
||||
header = Base.first(rows)
|
||||
padding = config.csv ? 0 : 12
|
||||
sep = config.csv ? "," : ""
|
||||
map(x -> rpad(x[1], padding), header) |> x -> join(x, sep) |> println
|
||||
for row in rows
|
||||
map(x -> rpad(x[2], padding), row) |> x -> join(x, sep) |> println
|
||||
end
|
||||
end
|
||||
|
||||
init::Tuple{type,type,type} = DefaultInit
|
||||
scalar::type = DefaultScalar
|
||||
|
||||
GC.enable(false)
|
||||
|
||||
(data, context) = make_stream(config.arraysize, scalar, device, config.csv)
|
||||
init_arrays!(data, context, init)
|
||||
if benchmark == All
|
||||
(timings, sum) = run_all!(data, context, config.numtimes)
|
||||
valid = check_solutions(read_data(data, context), config.numtimes, init, benchmark, sum)
|
||||
tabulate(
|
||||
mk_row(timings.copy, "Copy", 2 * array_bytes),
|
||||
mk_row(timings.mul, "Mul", 2 * array_bytes),
|
||||
mk_row(timings.add, "Add", 3 * array_bytes),
|
||||
mk_row(timings.triad, "Triad", 3 * array_bytes),
|
||||
mk_row(timings.dot, "Dot", 2 * array_bytes),
|
||||
)
|
||||
elseif benchmark == Nstream
|
||||
timings = run_nstream!(data, context, config.numtimes)
|
||||
valid =
|
||||
check_solutions(read_data(data, context), config.numtimes, init, benchmark, nothing)
|
||||
tabulate(mk_row(timings, "Nstream", 4 * array_bytes))
|
||||
elseif benchmark == Triad
|
||||
elapsed = run_triad!(data, context, config.numtimes)
|
||||
valid =
|
||||
check_solutions(read_data(data, context), config.numtimes, init, benchmark, nothing)
|
||||
total_bytes = 3 * array_bytes * config.numtimes
|
||||
bandwidth = mega_scale * (total_bytes / elapsed)
|
||||
println("Runtime (seconds): $(round(elapsed; digits=5))")
|
||||
println("Bandwidth ($giga_suffix/s): $(round(bandwidth; digits=3)) ")
|
||||
else
|
||||
error("Bad benchmark $(benchmark)")
|
||||
end
|
||||
|
||||
GC.enable(true)
|
||||
|
||||
if !valid
|
||||
exit(1)
|
||||
end
|
||||
|
||||
end
|
||||
7
JuliaStream.jl/src/StreamData.jl
Normal file
7
JuliaStream.jl/src/StreamData.jl
Normal file
@ -0,0 +1,7 @@
|
||||
struct StreamData{T,C<:AbstractArray{T}}
|
||||
a::C
|
||||
b::C
|
||||
c::C
|
||||
scalar::T
|
||||
size::Int
|
||||
end
|
||||
112
JuliaStream.jl/src/ThreadedStream.jl
Normal file
112
JuliaStream.jl/src/ThreadedStream.jl
Normal file
@ -0,0 +1,112 @@
|
||||
include("Stream.jl")
|
||||
|
||||
function devices()::Vector{DeviceWithRepr}
|
||||
return [(undef, "$(Sys.cpu_info()[1].model) ($(Threads.nthreads())T)", "Threaded")]
|
||||
end
|
||||
|
||||
function make_stream(
|
||||
arraysize::Int,
|
||||
scalar::T,
|
||||
_::DeviceWithRepr,
|
||||
silent::Bool,
|
||||
)::Tuple{VectorData{T},Nothing} where {T}
|
||||
if !silent
|
||||
println("Using max $(Threads.nthreads()) threads")
|
||||
end
|
||||
return (
|
||||
VectorData{T}(
|
||||
Vector{T}(undef, arraysize),
|
||||
Vector{T}(undef, arraysize),
|
||||
Vector{T}(undef, arraysize),
|
||||
scalar,
|
||||
arraysize,
|
||||
),
|
||||
nothing
|
||||
)
|
||||
end
|
||||
|
||||
function init_arrays!(data::VectorData{T}, _, init::Tuple{T,T,T}) where {T}
|
||||
Threads.@threads for i = 1:data.size
|
||||
@inbounds data.a[i] = init[1]
|
||||
@inbounds data.b[i] = init[2]
|
||||
@inbounds data.c[i] = init[3]
|
||||
end
|
||||
end
|
||||
|
||||
function copy!(data::VectorData{T}, _) where {T}
|
||||
Threads.@threads for i = 1:data.size
|
||||
@inbounds data.c[i] = data.a[i]
|
||||
end
|
||||
end
|
||||
|
||||
function mul!(data::VectorData{T}, _) where {T}
|
||||
Threads.@threads for i = 1:data.size
|
||||
@inbounds data.b[i] = data.scalar * data.c[i]
|
||||
end
|
||||
end
|
||||
|
||||
function add!(data::VectorData{T}, _) where {T}
|
||||
Threads.@threads for i = 1:data.size
|
||||
@inbounds data.c[i] = data.a[i] + data.b[i]
|
||||
end
|
||||
end
|
||||
|
||||
function triad!(data::VectorData{T}, _) where {T}
|
||||
Threads.@threads for i = 1:data.size
|
||||
@inbounds data.a[i] = data.b[i] + (data.scalar * data.c[i])
|
||||
end
|
||||
end
|
||||
|
||||
function nstream!(data::VectorData{T}, _) where {T}
|
||||
Threads.@threads for i = 1:data.size
|
||||
@inbounds data.a[i] += data.b[i] + data.scalar * data.c[i]
|
||||
end
|
||||
end
|
||||
|
||||
# Threads.@threads/Threads.@spawn doesn't support OpenMP's firstprivate, etc
|
||||
function static_par_ranged(f::Function, range::Int, n::Int)
|
||||
stride = range ÷ n
|
||||
rem = range % n
|
||||
strides = map(0:n) do i
|
||||
width = stride + (i < rem ? 1 : 0)
|
||||
offset = i < rem ? (stride + 1) * i : ((stride + 1) * rem) + (stride * (i - rem))
|
||||
(offset, width)
|
||||
end
|
||||
ccall(:jl_enter_threaded_region, Cvoid, ())
|
||||
try
|
||||
foreach(wait, map(1:n) do group
|
||||
(offset, size) = strides[group]
|
||||
task = Task(() -> f(group, offset+1, offset+size))
|
||||
task.sticky = true
|
||||
ccall(:jl_set_task_tid, Cvoid, (Any, Cint), task, group-1) # ccall, so 0-based for group
|
||||
schedule(task)
|
||||
end)
|
||||
finally
|
||||
ccall(:jl_exit_threaded_region, Cvoid, ())
|
||||
end
|
||||
end
|
||||
|
||||
function dot(data::VectorData{T}, _) where {T}
|
||||
partial = Vector{T}(undef, Threads.nthreads())
|
||||
static_par_ranged(data.size, Threads.nthreads()) do group, startidx, endidx
|
||||
acc = zero(T)
|
||||
@simd for i = startidx:endidx
|
||||
@inbounds acc += data.a[i] * data.b[i]
|
||||
end
|
||||
@inbounds partial[group] = acc
|
||||
end
|
||||
return sum(partial)
|
||||
# This doesn't do well on aarch64 because of the excessive Threads.threadid() ccall
|
||||
# and inhibited vectorisation from the lack of @simd
|
||||
# partial = zeros(T, Threads.nthreads())
|
||||
# Threads.@threads for i = 1:data.size
|
||||
# @inbounds partial[Threads.threadid()] += (data.a[i] * data.b[i])
|
||||
# end
|
||||
# return sum(partial)
|
||||
end
|
||||
|
||||
function read_data(data::VectorData{T}, _)::VectorData{T} where {T}
|
||||
return data
|
||||
end
|
||||
|
||||
main()
|
||||
170
JuliaStream.jl/src/oneAPIStream.jl
Normal file
170
JuliaStream.jl/src/oneAPIStream.jl
Normal file
@ -0,0 +1,170 @@
|
||||
using Base.Iterators: println
|
||||
using Base.Iterators: println
|
||||
using Printf: Iterators
|
||||
|
||||
include("Stream.jl")
|
||||
using oneAPI
|
||||
|
||||
const oneData = StreamData{T,oneArray{T}} where {T}
|
||||
const DotWGSize = 256::Int
|
||||
|
||||
function devices()::Vector{DeviceWithRepr}
|
||||
all = map(oneL0.devices, oneL0.drivers()) |> Iterators.flatten |> Iterators.collect
|
||||
map(dev -> (dev, repr("text/plain", dev), "oneAPi.jl"), all)
|
||||
end
|
||||
|
||||
function make_stream(
|
||||
arraysize::Int,
|
||||
scalar::T,
|
||||
device::DeviceWithRepr,
|
||||
silent::Bool,
|
||||
)::Tuple{oneData{T},Int} where {T}
|
||||
|
||||
oneAPI.allowscalar(false)
|
||||
oneAPI.device!(device[1])
|
||||
|
||||
props = oneL0.compute_properties(oneAPI.device())
|
||||
groupsize = min(props.maxTotalGroupSize, arraysize)
|
||||
|
||||
if arraysize % groupsize != 0
|
||||
error("arraysize ($(arraysize)) must be divisible by $(groupsize)!")
|
||||
end
|
||||
|
||||
if !silent
|
||||
println("Using L0 device: $(repr("text/plain",device[1]))")
|
||||
println("Kernel parameters : <<<$(arraysize),$(groupsize)>>>")
|
||||
end
|
||||
return (
|
||||
oneData{T}(
|
||||
oneArray{T}(undef, arraysize),
|
||||
oneArray{T}(undef, arraysize),
|
||||
oneArray{T}(undef, arraysize),
|
||||
scalar,
|
||||
arraysize,
|
||||
),
|
||||
groupsize,
|
||||
)
|
||||
end
|
||||
|
||||
function init_arrays!(data::oneData{T}, _, init::Tuple{T,T,T}) where {T}
|
||||
oneAPI.fill!(data.a, init[1])
|
||||
oneAPI.fill!(data.b, init[2])
|
||||
oneAPI.fill!(data.c, init[3])
|
||||
end
|
||||
|
||||
function copy!(data::oneData{T}, groupsize::Int) where {T}
|
||||
function kernel(a::AbstractArray{T}, c::AbstractArray{T})
|
||||
i = get_global_id()
|
||||
@inbounds c[i] = a[i]
|
||||
return
|
||||
end
|
||||
@oneapi items = groupsize groups = data.size ÷ groupsize kernel( #
|
||||
data.a,
|
||||
data.c,
|
||||
)
|
||||
oneAPI.synchronize()
|
||||
end
|
||||
|
||||
function mul!(data::oneData{T}, groupsize::Int) where {T}
|
||||
function kernel(b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
|
||||
i = get_global_id()
|
||||
@inbounds b[i] = scalar * c[i]
|
||||
return
|
||||
end
|
||||
@oneapi items = groupsize groups = data.size ÷ groupsize kernel( #
|
||||
data.b,
|
||||
data.c,
|
||||
data.scalar,
|
||||
)
|
||||
oneAPI.synchronize()
|
||||
end
|
||||
|
||||
function add!(data::oneData{T}, groupsize::Int) where {T}
|
||||
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T})
|
||||
i = get_global_id()
|
||||
@inbounds c[i] = a[i] + b[i]
|
||||
return
|
||||
end
|
||||
@oneapi items = groupsize groups = data.size ÷ groupsize kernel( #
|
||||
data.a,
|
||||
data.b,
|
||||
data.c,
|
||||
)
|
||||
oneAPI.synchronize()
|
||||
end
|
||||
|
||||
function triad!(data::oneData{T}, groupsize::Int) where {T}
|
||||
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
|
||||
i = get_global_id()
|
||||
@inbounds a[i] = b[i] + (scalar * c[i])
|
||||
return
|
||||
end
|
||||
@oneapi items = groupsize groups = data.size ÷ groupsize kernel( #
|
||||
data.a,
|
||||
data.b,
|
||||
data.c,
|
||||
data.scalar,
|
||||
)
|
||||
oneAPI.synchronize()
|
||||
end
|
||||
|
||||
function nstream!(data::oneData{T}, groupsize::Int) where {T}
|
||||
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T)
|
||||
i = get_global_id()
|
||||
@inbounds a[i] += b[i] + scalar * c[i]
|
||||
return
|
||||
end
|
||||
@oneapi items = groupsize groups = data.size ÷ groupsize kernel( #
|
||||
data.a,
|
||||
data.b,
|
||||
data.c,
|
||||
data.scalar,
|
||||
)
|
||||
oneAPI.synchronize()
|
||||
end
|
||||
|
||||
function dot(data::oneData{T}, groupsize::Int) where {T}
|
||||
function kernel(a::AbstractArray{T}, b::AbstractArray{T}, size::Int, partial::AbstractArray{T})
|
||||
wg_sum = @LocalMemory(T, (DotWGSize,))
|
||||
li = get_local_id()
|
||||
@inbounds wg_sum[li] = 0.0
|
||||
|
||||
# do dot first
|
||||
i = get_global_id()
|
||||
while i <= size
|
||||
@inbounds wg_sum[li] += a[i] * b[i]
|
||||
i += get_global_size()
|
||||
end
|
||||
|
||||
# then tree reduction
|
||||
offset = get_local_size() ÷ 2
|
||||
while offset > 0
|
||||
barrier()
|
||||
if li <= offset
|
||||
@inbounds wg_sum[li] += wg_sum[li+offset]
|
||||
end
|
||||
offset ÷= 2
|
||||
end
|
||||
|
||||
if li == 1
|
||||
@inbounds partial[get_group_id()] = wg_sum[li]
|
||||
end
|
||||
|
||||
return
|
||||
end
|
||||
partial_sum = oneArray{T}(undef, groupsize)
|
||||
@oneapi items = groupsize groups = DotWGSize kernel(
|
||||
data.a,
|
||||
data.b,
|
||||
data.size,
|
||||
partial_sum,
|
||||
)
|
||||
oneAPI.synchronize()
|
||||
return sum(partial_sum)
|
||||
end
|
||||
|
||||
function read_data(data::oneData{T}, _)::VectorData{T} where {T}
|
||||
return VectorData{T}(data.a, data.b, data.c, data.scalar, data.size)
|
||||
end
|
||||
|
||||
main()
|
||||
7
JuliaStream.jl/update_all.sh
Executable file
7
JuliaStream.jl/update_all.sh
Executable file
@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
# shellcheck disable=SC2034 disable=SC2153
|
||||
|
||||
for BACKEND in "." "AMDGPU" "CUDA" "oneAPI" "Threaded" "KernelAbstractions"
|
||||
do
|
||||
julia --project="$BACKEND" -e 'import Pkg; Pkg.resolve(); Pkg.instantiate(); Pkg.update(); Pkg.gc();'
|
||||
done
|
||||
@ -23,6 +23,8 @@ Currently implemented are:
|
||||
|
||||
This code was previously called GPU-STREAM.
|
||||
|
||||
This project also contains implementations in alternative languages with different build systems:
|
||||
* Scala - [scala-stream](./scala-stream)
|
||||
|
||||
How is this different to STREAM?
|
||||
--------------------------------
|
||||
@ -159,10 +161,12 @@ Citing
|
||||
|
||||
Please cite BabelStream via this reference:
|
||||
|
||||
> Deakin T, Price J, Martineau M, McIntosh-Smith S. GPU-STREAM v2.0: Benchmarking the achievable memory bandwidth of many-core processors across diverse parallel programming models. 2016. Paper presented at P^3MA Workshop at ISC High Performance, Frankfurt, Germany.
|
||||
> Deakin T, Price J, Martineau M, McIntosh-Smith S. GPU-STREAM v2.0: Benchmarking the achievable memory bandwidth of many-core processors across diverse parallel programming models. 2016. Paper presented at P^3MA Workshop at ISC High Performance, Frankfurt, Germany. DOI: 10.1007/978- 3-319-46079-6_34
|
||||
|
||||
**Other BabelStream publications:**
|
||||
|
||||
> Deakin T, Price J, Martineau M, McIntosh-Smith S. Evaluating attainable memory bandwidth of parallel programming models via BabelStream. International Journal of Computational Science and Engineering. Special issue. Vol. 17, No. 3, pp. 247–262. 2018.DOI: 10.1504/IJCSE.2018.095847
|
||||
|
||||
> Deakin T, McIntosh-Smith S. GPU-STREAM: Benchmarking the achievable memory bandwidth of Graphics Processing Units. 2015. Poster session presented at IEEE/ACM SuperComputing, Austin, United States.
|
||||
You can view the [Poster and Extended Abstract](http://sc15.supercomputing.org/sites/all/themes/SC15images/tech_poster/tech_poster_pages/post150.html).
|
||||
|
||||
@ -171,8 +175,6 @@ You can view the [Poster and Extended Abstract](http://sc16.supercomputing.org/s
|
||||
|
||||
> Raman K, Deakin T, Price J, McIntosh-Smith S. Improving achieved memory bandwidth from C++ codes on Intel Xeon Phi Processor (Knights Landing). IXPUG Spring Meeting, Cambridge, UK, 2017.
|
||||
|
||||
> Deakin T, Price J, Martineau M, McIntosh-Smith S. Evaluating attainable memory bandwidth of parallel programming models via BabelStream. International Journal of Computational Science and Engineering. Special issue (in press). 2017.
|
||||
|
||||
> Deakin T, Price J, McIntosh-Smith S. Portable methods for measuring cache hierarchy performance. 2017. Poster sessions presented at IEEE/ACM SuperComputing, Denver, United States.
|
||||
You can view the [Poster and Extended Abstract](http://sc17.supercomputing.org/SC17%20Archive/tech_poster/tech_poster_pages/post155.html)
|
||||
|
||||
|
||||
1
scala-stream/.bsp/sbt.json
Normal file
1
scala-stream/.bsp/sbt.json
Normal file
@ -0,0 +1 @@
|
||||
{"name":"sbt","version":"1.5.2","bspVersion":"2.0.0-M5","languages":["scala"],"argv":["/usr/lib/jvm/java-11-openjdk-11.0.11.0.9-2.fc33.x86_64/bin/java","-Xms100m","-Xmx100m","-classpath","/home/tom/.local/share/JetBrains/Toolbox/apps/IDEA-U/ch-0/211.7142.45.plugins/Scala/launcher/sbt-launch.jar","xsbt.boot.Boot","-bsp","--sbt-launch-jar=/home/tom/.local/share/JetBrains/Toolbox/apps/IDEA-U/ch-0/211.7142.45.plugins/Scala/launcher/sbt-launch.jar"]}
|
||||
1
scala-stream/.gitignore
vendored
Normal file
1
scala-stream/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
target/
|
||||
2
scala-stream/.jvmopts
Normal file
2
scala-stream/.jvmopts
Normal file
@ -0,0 +1,2 @@
|
||||
-Xmx4096m
|
||||
-Xss4m
|
||||
34
scala-stream/.scalafmt.conf
Normal file
34
scala-stream/.scalafmt.conf
Normal file
@ -0,0 +1,34 @@
|
||||
version = "3.0.0-RC2"
|
||||
runner.dialect = scala3
|
||||
|
||||
style = defaultWithAlign
|
||||
|
||||
maxColumn = 100
|
||||
|
||||
align.preset = more
|
||||
|
||||
rewrite.rules = [
|
||||
AvoidInfix
|
||||
RedundantBraces
|
||||
RedundantParens
|
||||
AsciiSortImports
|
||||
PreferCurlyFors
|
||||
]
|
||||
|
||||
rewrite.neverInfix.excludeFilters = [until
|
||||
to
|
||||
by
|
||||
eq
|
||||
ne
|
||||
"should.*"
|
||||
"contain.*"
|
||||
"must.*"
|
||||
in
|
||||
be
|
||||
taggedAs
|
||||
thrownBy
|
||||
synchronized
|
||||
have
|
||||
when
|
||||
size
|
||||
theSameElementsAs]
|
||||
102
scala-stream/README.md
Normal file
102
scala-stream/README.md
Normal file
@ -0,0 +1,102 @@
|
||||
ScalaStream
|
||||
===========
|
||||
|
||||
This is an implementation of BabelStream
|
||||
in [Scala 3](https://docs.scala-lang.org/scala3/new-in-scala3.html) on the JVM. In theory, this
|
||||
implementation also covers Java. Scala and Java, like any other programming language, has its own
|
||||
ecosystem of library supported parallel programming frameworks, we currently implement the
|
||||
following:
|
||||
|
||||
* Parallel streams (introduced in Java 8) - `src/main/scala/scalastream/J8SStream.scala`
|
||||
* [Scala Parallel Collections](https://github.com/scala/scala-parallel-collections)
|
||||
- `src/main/scala/scalastream/ParStream.scala`
|
||||
|
||||
As the benchmark is relatively simple, we also implement some baselines:
|
||||
|
||||
* Single threaded Scala `for` (i.e `foreach` sugar) - `src/main/scala/scalastream/PlainStream.scala`
|
||||
* Manually parallelism with Java executors - `src/main/scala/scalastream/ThreadedStream.scala`
|
||||
|
||||
### Performance considerations
|
||||
|
||||
As Scala 3 defaults to Scala 2.13's standard library, we roll our own `Fractional` typeclass with
|
||||
liberal use of inlining and specialisation. This is motivated by 2.13 stdlib's lack of
|
||||
specialisation for primitives types on the default `Fractional` and `Numeric` typeclasses.
|
||||
|
||||
The use of [Spire](https://github.com/typelevel/spire) to mitigate this was attempted, however, due
|
||||
to its use of Scala 2 macros, it currently doesn't compile with Scala 3.
|
||||
|
||||
### Build & Run
|
||||
|
||||
Prerequisites
|
||||
|
||||
* JDK >= 8 on any of its supported platform; known working implementations:
|
||||
- OpenJDK
|
||||
distributions ([Amazon Corretto](https://docs.aws.amazon.com/corretto/latest/corretto-11-ug/downloads-list.html)
|
||||
, [Azul](https://www.azul.com/downloads/?version=java-11-lts&package=jdk)
|
||||
, [AdoptOpenJDK](https://adoptopenjdk.net/), etc)
|
||||
- Oracle Graal CE/EE 8+
|
||||
|
||||
To run the benchmark, first create a binary:
|
||||
|
||||
```shell
|
||||
> ./sbt assembly
|
||||
```
|
||||
|
||||
The binary will be located at `./target/scala-3.0.0/scala-stream.jar`. Run it with:
|
||||
|
||||
```shell
|
||||
> java -version
|
||||
openjdk version "11.0.11" 2021-04-20
|
||||
OpenJDK Runtime Environment 18.9 (build 11.0.11+9)
|
||||
OpenJDK 64-Bit Server VM 18.9 (build 11.0.11+9, mixed mode, sharing)
|
||||
> java -jar target/scala-3.0.0/scala-stream.jar --help
|
||||
|
||||
```
|
||||
|
||||
For best results, benchmark with the following JVM flags:
|
||||
|
||||
```
|
||||
-XX:-UseOnStackReplacement # disable OSR, not useful for this benchmark as we are measuring peak performance
|
||||
-XX:-TieredCompilation # disable C1, go straight to C2
|
||||
-XX:ReservedCodeCacheSize=512m # don't flush compiled code out of cache at any point
|
||||
```
|
||||
|
||||
Worked example:
|
||||
|
||||
```shell
|
||||
> java -XX:-UseOnStackReplacement -XX:-TieredCompilation -XX:ReservedCodeCacheSize=512m -jar target/scala-3.0.0/scala-stream.jar
|
||||
|
||||
BabelStream
|
||||
Version: 3.4.0
|
||||
Implementation: Scala Parallel Collections; Scala (Java 11.0.11; Red Hat, Inc.; home=/usr/lib/jvm/java-11-openjdk-11.0.11.0.9-2.fc33.x86_64)
|
||||
Running kernels 100 times
|
||||
Precision: double
|
||||
Array size: 268.4 MB (=0.3 GB)
|
||||
Total size: 805.3 MB (=0.8 GB)
|
||||
Function MBytes/sec Min (sec) Max Average
|
||||
Copy 4087.077 0.13136 0.24896 0.15480
|
||||
Mul 2934.709 0.18294 0.28706 0.21627
|
||||
Add 3016.342 0.26698 0.39835 0.31119
|
||||
Triad 3016.496 0.26697 0.37612 0.31040
|
||||
Dot 2216.096 0.24226 0.41235 0.28264
|
||||
|
||||
```
|
||||
|
||||
### Graal Native Image
|
||||
|
||||
The port has partial support for Graal Native Image, to generate one, run:
|
||||
|
||||
```shell
|
||||
> ./sbt nativeImage
|
||||
```
|
||||
|
||||
The ELF binary will be located at `./target/native-image/scala-stream`, relocation should work on
|
||||
the same architecture the binary is built on.
|
||||
|
||||
There's an ongoing bug with Scala 3 's use of `lazy val`s where the program crashes at declaration
|
||||
site. Currently, Scala Parallel Collections uses this feature internally, so selecting this device
|
||||
will crash at runtime.
|
||||
|
||||
The bug originates from the use of `Unsafe` in `lazy val` for thready safety guarantees. It seems
|
||||
that Graal only supports limited uses of this JVM implementation detail and Scala 3 happens to be on
|
||||
the unsupported side.
|
||||
29
scala-stream/build.sbt
Normal file
29
scala-stream/build.sbt
Normal file
@ -0,0 +1,29 @@
|
||||
lazy val mainCls = Some("scalastream.App")
|
||||
|
||||
lazy val root = (project in file("."))
|
||||
.enablePlugins(NativeImagePlugin)
|
||||
.settings(
|
||||
scalaVersion := "3.0.0",
|
||||
version := "3.4.0",
|
||||
organization := "uk.ac.bristol.uob-hpc",
|
||||
organizationName := "University of Bristol",
|
||||
Compile / mainClass := mainCls,
|
||||
assembly / mainClass := mainCls,
|
||||
scalacOptions ~= filterConsoleScalacOptions,
|
||||
assembly / assemblyJarName := "scala-stream.jar",
|
||||
nativeImageOptions := Seq(
|
||||
"--no-fallback",
|
||||
"-H:ReflectionConfigurationFiles=../../reflect-config.json"
|
||||
),
|
||||
nativeImageVersion := "21.1.0",
|
||||
(Global / excludeLintKeys) += nativeImageVersion,
|
||||
name := "scala-stream",
|
||||
libraryDependencies ++= Seq(
|
||||
// Lazy val implementation in Scala 3 triggers an exception in nativeImage, use 2_13 for arg parsing for now otherwise we can't get to the benchmarking part
|
||||
("com.github.scopt" %% "scopt" % "4.0.1").cross(CrossVersion.for3Use2_13),
|
||||
// par also uses lazy val at some point, so it doesn't work in nativeImage
|
||||
"org.scala-lang.modules" %% "scala-parallel-collections" % "1.0.3",
|
||||
"net.openhft" % "affinity" % "3.21ea1",
|
||||
"org.slf4j" % "slf4j-simple" % "1.7.30" // for affinity
|
||||
)
|
||||
)
|
||||
1
scala-stream/project/build.properties
Normal file
1
scala-stream/project/build.properties
Normal file
@ -0,0 +1 @@
|
||||
sbt.version=1.5.2
|
||||
6
scala-stream/project/plugins.sbt
Normal file
6
scala-stream/project/plugins.sbt
Normal file
@ -0,0 +1,6 @@
|
||||
addSbtPlugin("com.timushev.sbt" % "sbt-updates" % "0.5.3")
|
||||
addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.1.17")
|
||||
addSbtPlugin("org.scalameta" % "sbt-native-image" % "0.3.0")
|
||||
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.15.0")
|
||||
addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.9.27")
|
||||
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.2")
|
||||
11
scala-stream/reflect-config.json
Normal file
11
scala-stream/reflect-config.json
Normal file
@ -0,0 +1,11 @@
|
||||
[
|
||||
{
|
||||
"name": "sun.misc.Unsafe",
|
||||
"fields": [
|
||||
{
|
||||
"name": "theUnsafe",
|
||||
"allowUnsafeAccess": true
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
3
scala-stream/sbt
Executable file
3
scala-stream/sbt
Executable file
@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
./sbt-dist/bin/sbt "$@"
|
||||
BIN
scala-stream/sbt-dist/bin/java9-rt-export.jar
Normal file
BIN
scala-stream/sbt-dist/bin/java9-rt-export.jar
Normal file
Binary file not shown.
177
scala-stream/sbt-dist/bin/sbt
Executable file
177
scala-stream/sbt-dist/bin/sbt
Executable file
@ -0,0 +1,177 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
|
||||
### ------------------------------- ###
|
||||
### Helper methods for BASH scripts ###
|
||||
### ------------------------------- ###
|
||||
|
||||
realpath () {
|
||||
(
|
||||
TARGET_FILE="$1"
|
||||
FIX_CYGPATH="$2"
|
||||
|
||||
cd "$(dirname "$TARGET_FILE")"
|
||||
TARGET_FILE=$(basename "$TARGET_FILE")
|
||||
|
||||
COUNT=0
|
||||
while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ]
|
||||
do
|
||||
TARGET_FILE=$(readlink "$TARGET_FILE")
|
||||
cd "$(dirname "$TARGET_FILE")"
|
||||
TARGET_FILE=$(basename "$TARGET_FILE")
|
||||
COUNT=$(($COUNT + 1))
|
||||
done
|
||||
|
||||
# make sure we grab the actual windows path, instead of cygwin's path.
|
||||
if [[ "x$FIX_CYGPATH" != "x" ]]; then
|
||||
echo "$(cygwinpath "$(pwd -P)/$TARGET_FILE")"
|
||||
else
|
||||
echo "$(pwd -P)/$TARGET_FILE"
|
||||
fi
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
# Uses uname to detect if we're in the odd cygwin environment.
|
||||
is_cygwin() {
|
||||
local os=$(uname -s)
|
||||
case "$os" in
|
||||
CYGWIN*) return 0 ;;
|
||||
MINGW*) return 0 ;;
|
||||
MSYS*) return 0 ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# TODO - Use nicer bash-isms here.
|
||||
CYGWIN_FLAG=$(if is_cygwin; then echo true; else echo false; fi)
|
||||
|
||||
|
||||
# This can fix cygwin style /cygdrive paths so we get the
|
||||
# windows style paths.
|
||||
cygwinpath() {
|
||||
local file="$1"
|
||||
if [[ "$CYGWIN_FLAG" == "true" ]]; then
|
||||
echo $(cygpath -w $file)
|
||||
else
|
||||
echo $file
|
||||
fi
|
||||
}
|
||||
|
||||
. "$(dirname "$(realpath "$0")")/sbt-launch-lib.bash"
|
||||
|
||||
|
||||
declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
|
||||
declare -r sbt_opts_file=".sbtopts"
|
||||
declare -r etc_sbt_opts_file="/etc/sbt/sbtopts"
|
||||
declare -r dist_sbt_opts_file="${sbt_home}/conf/sbtopts"
|
||||
declare -r win_sbt_opts_file="${sbt_home}/conf/sbtconfig.txt"
|
||||
|
||||
usage() {
|
||||
cat <<EOM
|
||||
Usage: `basename "$0"` [options]
|
||||
|
||||
-h | -help print this message
|
||||
-v | -verbose this runner is chattier
|
||||
-d | -debug set sbt log level to debug
|
||||
-no-colors disable ANSI color codes
|
||||
-sbt-create start sbt even if current directory contains no sbt project
|
||||
-sbt-dir <path> path to global settings/plugins directory (default: ~/.sbt)
|
||||
-sbt-boot <path> path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
|
||||
-ivy <path> path to local Ivy repository (default: ~/.ivy2)
|
||||
-mem <integer> set memory options (default: $sbt_default_mem, which is $(get_mem_opts))
|
||||
-no-share use all local caches; no sharing
|
||||
-no-global uses global caches, but does not use global ~/.sbt directory.
|
||||
-jvm-debug <port> Turn on JVM debugging, open at the given port.
|
||||
-batch Disable interactive mode
|
||||
|
||||
# sbt version (default: from project/build.properties if present, else latest release)
|
||||
-sbt-version <version> use the specified version of sbt
|
||||
-sbt-jar <path> use the specified jar as the sbt launcher
|
||||
-sbt-rc use an RC version of sbt
|
||||
-sbt-snapshot use a snapshot version of sbt
|
||||
|
||||
# java version (default: java from PATH, currently $(java -version 2>&1 | grep version))
|
||||
-java-home <path> alternate JAVA_HOME
|
||||
|
||||
# jvm options and output control
|
||||
JAVA_OPTS environment variable, if unset uses "$java_opts"
|
||||
.jvmopts if this file exists in the current directory, its contents
|
||||
are appended to JAVA_OPTS
|
||||
SBT_OPTS environment variable, if unset uses "$default_sbt_opts"
|
||||
.sbtopts if this file exists in the current directory, its contents
|
||||
are prepended to the runner args
|
||||
/etc/sbt/sbtopts if this file exists, it is prepended to the runner args
|
||||
-Dkey=val pass -Dkey=val directly to the java runtime
|
||||
-J-X pass option -X directly to the java runtime
|
||||
(-J is stripped)
|
||||
-S-X add -X to sbt's scalacOptions (-S is stripped)
|
||||
|
||||
In the case of duplicated or conflicting options, the order above
|
||||
shows precedence: JAVA_OPTS lowest, command line options highest.
|
||||
EOM
|
||||
}
|
||||
|
||||
|
||||
|
||||
process_my_args () {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-no-colors) addJava "-Dsbt.log.noformat=true" && shift ;;
|
||||
-no-share) addJava "$noshare_opts" && shift ;;
|
||||
-no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;;
|
||||
-sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;;
|
||||
-sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;;
|
||||
-debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;;
|
||||
-batch) exec </dev/null && shift ;;
|
||||
|
||||
-sbt-create) sbt_create=true && shift ;;
|
||||
|
||||
new) sbt_new=true && addResidual "$1" && shift ;;
|
||||
|
||||
*) addResidual "$1" && shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Now, ensure sbt version is used.
|
||||
[[ "${sbt_version}XXX" != "XXX" ]] && addJava "-Dsbt.version=$sbt_version"
|
||||
|
||||
# Confirm a user's intent if the current directory does not look like an sbt
|
||||
# top-level directory and neither the -sbt-create option nor the "new"
|
||||
# command was given.
|
||||
[[ -f ./build.sbt || -d ./project || -n "$sbt_create" || -n "$sbt_new" ]] || {
|
||||
echo "[warn] Neither build.sbt nor a 'project' directory in the current directory: $(pwd)"
|
||||
while true; do
|
||||
echo 'c) continue'
|
||||
echo 'q) quit'
|
||||
|
||||
read -p '? ' || exit 1
|
||||
case "$REPLY" in
|
||||
c|C) break ;;
|
||||
q|Q) exit 1 ;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
}
|
||||
|
||||
loadConfigFile() {
|
||||
# Make sure the last line is read even if it doesn't have a terminating \n
|
||||
cat "$1" | sed $'/^\#/d;s/\r$//' | while read -r line || [[ -n "$line" ]]; do
|
||||
eval echo $line
|
||||
done
|
||||
}
|
||||
|
||||
# Here we pull in the default settings configuration.
|
||||
[[ -f "$dist_sbt_opts_file" ]] && set -- $(loadConfigFile "$dist_sbt_opts_file") "$@"
|
||||
|
||||
# Here we pull in the global settings configuration.
|
||||
[[ -f "$etc_sbt_opts_file" ]] && set -- $(loadConfigFile "$etc_sbt_opts_file") "$@"
|
||||
|
||||
# Pull in the project-level config file, if it exists.
|
||||
[[ -f "$sbt_opts_file" ]] && set -- $(loadConfigFile "$sbt_opts_file") "$@"
|
||||
|
||||
# Pull in the project-level java config, if it exists.
|
||||
[[ -f ".jvmopts" ]] && export JAVA_OPTS="$JAVA_OPTS $(loadConfigFile .jvmopts)"
|
||||
|
||||
run "$@"
|
||||
|
||||
363
scala-stream/sbt-dist/bin/sbt-launch-lib.bash
Normal file
363
scala-stream/sbt-dist/bin/sbt-launch-lib.bash
Normal file
@ -0,0 +1,363 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
|
||||
# A library to simplify using the SBT launcher from other packages.
|
||||
# Note: This should be used by tools like giter8/conscript etc.
|
||||
|
||||
# TODO - Should we merge the main SBT script with this library?
|
||||
|
||||
declare -a residual_args
|
||||
declare -a java_args
|
||||
declare -a scalac_args
|
||||
declare -a sbt_commands
|
||||
declare java_cmd=java
|
||||
declare java_version
|
||||
declare init_sbt_version="1.2.8"
|
||||
declare sbt_default_mem=1024
|
||||
|
||||
declare SCRIPT=$0
|
||||
while [ -h "$SCRIPT" ] ; do
|
||||
ls=$(ls -ld "$SCRIPT")
|
||||
# Drop everything prior to ->
|
||||
link=$(expr "$ls" : '.*-> \(.*\)$')
|
||||
if expr "$link" : '/.*' > /dev/null; then
|
||||
SCRIPT="$link"
|
||||
else
|
||||
SCRIPT=$(dirname "$SCRIPT")/"$link"
|
||||
fi
|
||||
done
|
||||
declare -r sbt_bin_dir="$(dirname "$SCRIPT")"
|
||||
declare -r sbt_home="$(dirname "$sbt_bin_dir")"
|
||||
|
||||
echoerr () {
|
||||
echo 1>&2 "$@"
|
||||
}
|
||||
vlog () {
|
||||
[[ $verbose || $debug ]] && echoerr "$@"
|
||||
}
|
||||
dlog () {
|
||||
[[ $debug ]] && echoerr "$@"
|
||||
}
|
||||
|
||||
jar_file () {
|
||||
echo "$(cygwinpath "${sbt_home}/bin/sbt-launch.jar")"
|
||||
}
|
||||
|
||||
acquire_sbt_jar () {
|
||||
sbt_jar="$(jar_file)"
|
||||
|
||||
if [[ ! -f "$sbt_jar" ]]; then
|
||||
echoerr "Could not find launcher jar: $sbt_jar"
|
||||
exit 2
|
||||
fi
|
||||
}
|
||||
|
||||
rt_export_file () {
|
||||
echo "${sbt_bin_dir}/java9-rt-export.jar"
|
||||
}
|
||||
|
||||
execRunner () {
|
||||
# print the arguments one to a line, quoting any containing spaces
|
||||
[[ $verbose || $debug ]] && echo "# Executing command line:" && {
|
||||
for arg; do
|
||||
if printf "%s\n" "$arg" | grep -q ' '; then
|
||||
printf "\"%s\"\n" "$arg"
|
||||
else
|
||||
printf "%s\n" "$arg"
|
||||
fi
|
||||
done
|
||||
echo ""
|
||||
}
|
||||
|
||||
# THis used to be exec, but we loose the ability to re-hook stty then
|
||||
# for cygwin... Maybe we should flag the feature here...
|
||||
"$@"
|
||||
}
|
||||
|
||||
addJava () {
|
||||
dlog "[addJava] arg = '$1'"
|
||||
java_args=( "${java_args[@]}" "$1" )
|
||||
}
|
||||
addSbt () {
|
||||
dlog "[addSbt] arg = '$1'"
|
||||
sbt_commands=( "${sbt_commands[@]}" "$1" )
|
||||
}
|
||||
addResidual () {
|
||||
dlog "[residual] arg = '$1'"
|
||||
residual_args=( "${residual_args[@]}" "$1" )
|
||||
}
|
||||
addDebugger () {
|
||||
addJava "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$1"
|
||||
}
|
||||
|
||||
get_mem_opts () {
|
||||
# if we detect any of these settings in ${JAVA_OPTS} or ${JAVA_TOOL_OPTIONS} we need to NOT output our settings.
|
||||
# The reason is the Xms/Xmx, if they don't line up, cause errors.
|
||||
if [[ "${JAVA_OPTS}" == *-Xmx* ]] || [[ "${JAVA_OPTS}" == *-Xms* ]] || [[ "${JAVA_OPTS}" == *-XX:MaxPermSize* ]] || [[ "${JAVA_OPTS}" == *-XX:MaxMetaspaceSize* ]] || [[ "${JAVA_OPTS}" == *-XX:ReservedCodeCacheSize* ]]; then
|
||||
echo ""
|
||||
elif [[ "${JAVA_TOOL_OPTIONS}" == *-Xmx* ]] || [[ "${JAVA_TOOL_OPTIONS}" == *-Xms* ]] || [[ "${JAVA_TOOL_OPTIONS}" == *-XX:MaxPermSize* ]] || [[ "${JAVA_TOOL_OPTIONS}" == *-XX:MaxMetaspaceSize* ]] || [[ "${JAVA_TOOL_OPTIONS}" == *-XX:ReservedCodeCacheSize* ]]; then
|
||||
echo ""
|
||||
elif [[ "${SBT_OPTS}" == *-Xmx* ]] || [[ "${SBT_OPTS}" == *-Xms* ]] || [[ "${SBT_OPTS}" == *-XX:MaxPermSize* ]] || [[ "${SBT_OPTS}" == *-XX:MaxMetaspaceSize* ]] || [[ "${SBT_OPTS}" == *-XX:ReservedCodeCacheSize* ]]; then
|
||||
echo ""
|
||||
else
|
||||
# a ham-fisted attempt to move some memory settings in concert
|
||||
# so they need not be messed around with individually.
|
||||
local mem=${1:-$sbt_default_mem}
|
||||
local codecache=$(( $mem / 8 ))
|
||||
(( $codecache > 128 )) || codecache=128
|
||||
(( $codecache < 512 )) || codecache=512
|
||||
local class_metadata_size=$(( $codecache * 2 ))
|
||||
if [[ -z $java_version ]]; then
|
||||
java_version=$(jdk_version)
|
||||
fi
|
||||
local class_metadata_opt=$((( $java_version < 8 )) && echo "MaxPermSize" || echo "MaxMetaspaceSize")
|
||||
|
||||
local arg_xms=$([[ "${java_args[@]}" == *-Xms* ]] && echo "" || echo "-Xms${mem}m")
|
||||
local arg_xmx=$([[ "${java_args[@]}" == *-Xmx* ]] && echo "" || echo "-Xmx${mem}m")
|
||||
local arg_rccs=$([[ "${java_args[@]}" == *-XX:ReservedCodeCacheSize* ]] && echo "" || echo "-XX:ReservedCodeCacheSize=${codecache}m")
|
||||
local arg_meta=$([[ "${java_args[@]}" == *-XX:${class_metadata_opt}* && ! (( $java_version < 8 )) ]] && echo "" || echo "-XX:${class_metadata_opt}=${class_metadata_size}m")
|
||||
|
||||
echo "${arg_xms} ${arg_xmx} ${arg_rccs} ${arg_meta}"
|
||||
fi
|
||||
}
|
||||
|
||||
get_gc_opts () {
|
||||
local older_than_9=$(( $java_version < 9 ))
|
||||
|
||||
if [[ "$older_than_9" == "1" ]]; then
|
||||
# don't need to worry about gc
|
||||
echo ""
|
||||
elif [[ "${JAVA_OPTS}" =~ Use.*GC ]] || [[ "${JAVA_TOOL_OPTIONS}" =~ Use.*GC ]] || [[ "${SBT_OPTS}" =~ Use.*GC ]] ; then
|
||||
# GC arg has been passed in - don't change
|
||||
echo ""
|
||||
else
|
||||
# Java 9+ so revert to old
|
||||
echo "-XX:+UseParallelGC"
|
||||
fi
|
||||
}
|
||||
|
||||
require_arg () {
|
||||
local type="$1"
|
||||
local opt="$2"
|
||||
local arg="$3"
|
||||
if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then
|
||||
echo "$opt requires <$type> argument"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
is_function_defined() {
|
||||
declare -f "$1" > /dev/null
|
||||
}
|
||||
|
||||
# parses JDK version from the -version output line.
|
||||
# 8 for 1.8.0_nn, 9 for 9-ea etc, and "no_java" for undetected
|
||||
jdk_version() {
|
||||
local result
|
||||
local lines=$("$java_cmd" -Xms32M -Xmx32M -version 2>&1 | tr '\r' '\n')
|
||||
local IFS=$'\n'
|
||||
for line in $lines; do
|
||||
if [[ (-z $result) && ($line = *"version \""*) ]]
|
||||
then
|
||||
local ver=$(echo $line | sed -e 's/.*version "\(.*\)"\(.*\)/\1/; 1q')
|
||||
# on macOS sed doesn't support '?'
|
||||
if [[ $ver = "1."* ]]
|
||||
then
|
||||
result=$(echo $ver | sed -e 's/1\.\([0-9]*\)\(.*\)/\1/; 1q')
|
||||
else
|
||||
result=$(echo $ver | sed -e 's/\([0-9]*\)\(.*\)/\1/; 1q')
|
||||
fi
|
||||
fi
|
||||
done
|
||||
if [[ -z $result ]]
|
||||
then
|
||||
result=no_java
|
||||
fi
|
||||
echo "$result"
|
||||
}
|
||||
|
||||
process_args () {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-h|-help) usage; exit 1 ;;
|
||||
-v|-verbose) verbose=1 && shift ;;
|
||||
-d|-debug) debug=1 && addSbt "-debug" && shift ;;
|
||||
|
||||
-ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;;
|
||||
-mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;;
|
||||
-jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;;
|
||||
-batch) exec </dev/null && shift ;;
|
||||
|
||||
-sbt-jar) require_arg path "$1" "$2" && sbt_jar="$2" && shift 2 ;;
|
||||
-sbt-version) require_arg version "$1" "$2" && sbt_version="$2" && shift 2 ;;
|
||||
-java-home) require_arg path "$1" "$2" &&
|
||||
java_cmd="$2/bin/java" &&
|
||||
export JAVA_HOME="$2" &&
|
||||
export JDK_HOME="$2" &&
|
||||
export PATH="$2/bin:$PATH" &&
|
||||
shift 2 ;;
|
||||
|
||||
"-D*") addJava "$1" && shift ;;
|
||||
-J*) addJava "${1:2}" && shift ;;
|
||||
*) addResidual "$1" && shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
is_function_defined process_my_args && {
|
||||
myargs=("${residual_args[@]}")
|
||||
residual_args=()
|
||||
process_my_args "${myargs[@]}"
|
||||
}
|
||||
|
||||
java_version="$(jdk_version)"
|
||||
vlog "[process_args] java_version = '$java_version'"
|
||||
}
|
||||
|
||||
# Extracts the preloaded directory from either -Dsbt.preloaded or -Dsbt.global.base
|
||||
# properties by looking at:
|
||||
# - _JAVA_OPTIONS environment variable,
|
||||
# - SBT_OPTS environment variable,
|
||||
# - JAVA_OPTS environment variable and
|
||||
# - properties set by command-line options
|
||||
# in that order. The last one will be chosen such that `sbt.preloaded` is
|
||||
# always preferred over `sbt.global.base`.
|
||||
getPreloaded() {
|
||||
local -a _java_options_array
|
||||
local -a sbt_opts_array
|
||||
local -a java_opts_array
|
||||
read -a _java_options_array <<< "$_JAVA_OPTIONS"
|
||||
read -a sbt_opts_array <<< "$SBT_OPTS"
|
||||
read -a java_opts_array <<< "$JAVA_OPTS"
|
||||
|
||||
local args_to_check=(
|
||||
"${_java_options_array[@]}"
|
||||
"${sbt_opts_array[@]}"
|
||||
"${java_opts_array[@]}"
|
||||
"${java_args[@]}")
|
||||
local via_global_base="$HOME/.sbt/preloaded"
|
||||
local via_explicit=""
|
||||
|
||||
for opt in "${args_to_check[@]}"; do
|
||||
if [[ "$opt" == -Dsbt.preloaded=* ]]; then
|
||||
via_explicit="${opt#-Dsbt.preloaded=}"
|
||||
elif [[ "$opt" == -Dsbt.global.base=* ]]; then
|
||||
via_global_base="${opt#-Dsbt.global.base=}/preloaded"
|
||||
fi
|
||||
done
|
||||
|
||||
echo "${via_explicit:-${via_global_base}}"
|
||||
}
|
||||
|
||||
syncPreloaded() {
|
||||
local source_preloaded="$sbt_home/lib/local-preloaded/"
|
||||
local target_preloaded="$(getPreloaded)"
|
||||
if [[ "$init_sbt_version" == "" ]]; then
|
||||
# FIXME: better $init_sbt_version detection
|
||||
init_sbt_version="$(ls -1 "$source_preloaded/org.scala-sbt/sbt/")"
|
||||
fi
|
||||
[[ -f "$target_preloaded/org.scala-sbt/sbt/$init_sbt_version/jars/sbt.jar" ]] || {
|
||||
# lib/local-preloaded exists (This is optional)
|
||||
[[ -d "$source_preloaded" ]] && {
|
||||
command -v rsync >/dev/null 2>&1 && {
|
||||
mkdir -p "$target_preloaded"
|
||||
rsync -a --ignore-existing "$source_preloaded" "$target_preloaded"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Detect that we have java installed.
|
||||
checkJava() {
|
||||
local required_version="$1"
|
||||
# Now check to see if it's a good enough version
|
||||
local good_enough="$(expr $java_version ">=" $required_version)"
|
||||
if [[ "$java_version" == "" ]]; then
|
||||
echo
|
||||
echo "No Java Development Kit (JDK) installation was detected."
|
||||
echo Please go to http://www.oracle.com/technetwork/java/javase/downloads/ and download.
|
||||
echo
|
||||
exit 1
|
||||
elif [[ "$good_enough" != "1" ]]; then
|
||||
echo
|
||||
echo "The Java Development Kit (JDK) installation you have is not up to date."
|
||||
echo $script_name requires at least version $required_version+, you have
|
||||
echo version $java_version
|
||||
echo
|
||||
echo Please go to http://www.oracle.com/technetwork/java/javase/downloads/ and download
|
||||
echo a valid JDK and install before running $script_name.
|
||||
echo
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
copyRt() {
|
||||
local at_least_9="$(expr $java_version ">=" 9)"
|
||||
if [[ "$at_least_9" == "1" ]]; then
|
||||
rtexport=$(rt_export_file)
|
||||
# The grep for java9-rt-ext- matches the filename prefix printed in Export.java
|
||||
java9_ext=$("$java_cmd" ${JAVA_OPTS} ${SBT_OPTS:-$default_sbt_opts} ${java_args[@]} \
|
||||
-jar "$rtexport" --rt-ext-dir | grep java9-rt-ext-)
|
||||
java9_rt=$(echo "$java9_ext/rt.jar")
|
||||
vlog "[copyRt] java9_rt = '$java9_rt'"
|
||||
if [[ ! -f "$java9_rt" ]]; then
|
||||
echo Copying runtime jar.
|
||||
mkdir -p "$java9_ext"
|
||||
execRunner "$java_cmd" \
|
||||
${JAVA_OPTS} \
|
||||
${SBT_OPTS:-$default_sbt_opts} \
|
||||
${java_args[@]} \
|
||||
-jar "$rtexport" \
|
||||
"${java9_rt}"
|
||||
fi
|
||||
addJava "-Dscala.ext.dirs=${java9_ext}"
|
||||
fi
|
||||
}
|
||||
|
||||
run() {
|
||||
# process the combined args, then reset "$@" to the residuals
|
||||
process_args "$@"
|
||||
set -- "${residual_args[@]}"
|
||||
argumentCount=$#
|
||||
|
||||
# Copy preloaded repo to user's preloaded directory
|
||||
syncPreloaded
|
||||
|
||||
# no jar? download it.
|
||||
[[ -f "$sbt_jar" ]] || acquire_sbt_jar "$sbt_version" || {
|
||||
# still no jar? uh-oh.
|
||||
echo "Download failed. Obtain the sbt-launch.jar manually and place it at $sbt_jar"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# TODO - java check should be configurable...
|
||||
checkJava "6"
|
||||
|
||||
# Java 9 support
|
||||
copyRt
|
||||
|
||||
#If we're in cygwin, we should use the windows config, and terminal hacks
|
||||
if [[ "$CYGWIN_FLAG" == "true" ]]; then
|
||||
stty -icanon min 1 -echo > /dev/null 2>&1
|
||||
addJava "-Djline.terminal=jline.UnixTerminal"
|
||||
addJava "-Dsbt.cygwin=true"
|
||||
fi
|
||||
|
||||
# run sbt
|
||||
execRunner "$java_cmd" \
|
||||
$(get_mem_opts $sbt_mem) \
|
||||
$(get_gc_opts) \
|
||||
${JAVA_OPTS} \
|
||||
${SBT_OPTS:-$default_sbt_opts} \
|
||||
${java_args[@]} \
|
||||
-jar "$sbt_jar" \
|
||||
"${sbt_commands[@]}" \
|
||||
"${residual_args[@]}"
|
||||
|
||||
exit_code=$?
|
||||
|
||||
# Clean up the terminal from cygwin hacks.
|
||||
if [[ "$CYGWIN_FLAG" == "true" ]]; then
|
||||
stty icanon echo > /dev/null 2>&1
|
||||
fi
|
||||
exit $exit_code
|
||||
}
|
||||
BIN
scala-stream/sbt-dist/bin/sbt-launch.jar
Normal file
BIN
scala-stream/sbt-dist/bin/sbt-launch.jar
Normal file
Binary file not shown.
212
scala-stream/sbt-dist/bin/sbt.bat
Normal file
212
scala-stream/sbt-dist/bin/sbt.bat
Normal file
@ -0,0 +1,212 @@
|
||||
@REM SBT launcher script
|
||||
@REM
|
||||
@REM Environment:
|
||||
@REM JAVA_HOME - location of a JDK home dir (mandatory)
|
||||
@REM SBT_OPTS - JVM options (optional)
|
||||
@REM Configuration:
|
||||
@REM sbtconfig.txt found in the SBT_HOME.
|
||||
|
||||
@REM ZOMG! We need delayed expansion to build up CFG_OPTS later
|
||||
@setlocal enabledelayedexpansion
|
||||
|
||||
@echo off
|
||||
set SBT_HOME=%~dp0
|
||||
set SBT_ARGS=
|
||||
|
||||
rem FIRST we load the config file of extra options.
|
||||
set FN=%SBT_HOME%\..\conf\sbtconfig.txt
|
||||
set CFG_OPTS=
|
||||
FOR /F "tokens=* eol=# usebackq delims=" %%i IN ("%FN%") DO (
|
||||
set DO_NOT_REUSE_ME=%%i
|
||||
rem ZOMG (Part #2) WE use !! here to delay the expansion of
|
||||
rem CFG_OPTS, otherwise it remains "" for this loop.
|
||||
set CFG_OPTS=!CFG_OPTS! !DO_NOT_REUSE_ME!
|
||||
)
|
||||
|
||||
rem poor man's jenv (which is not available on Windows)
|
||||
IF DEFINED JAVA_HOMES (
|
||||
IF EXIST .java-version FOR /F %%A IN (.java-version) DO (
|
||||
SET JAVA_HOME=%JAVA_HOMES%\%%A
|
||||
SET JDK_HOME=%JAVA_HOMES%\%%A
|
||||
)
|
||||
)
|
||||
rem must set PATH or wrong javac is used for java projects
|
||||
IF DEFINED JAVA_HOME SET "PATH=%JAVA_HOME%\bin;%PATH%"
|
||||
|
||||
rem users can set JAVA_OPTS via .jvmopts (sbt-extras style)
|
||||
IF EXIST .jvmopts FOR /F %%A IN (.jvmopts) DO (
|
||||
SET _jvmopts_line=%%A
|
||||
IF NOT "!_jvmopts_line:~0,1!"=="#" (
|
||||
SET JAVA_OPTS=%%A !JAVA_OPTS!
|
||||
)
|
||||
)
|
||||
rem We use the value of the JAVACMD environment variable if defined
|
||||
set _JAVACMD=%JAVACMD%
|
||||
|
||||
if "%_JAVACMD%"=="" (
|
||||
if not "%JAVA_HOME%"=="" (
|
||||
if exist "%JAVA_HOME%\bin\java.exe" set "_JAVACMD=%JAVA_HOME%\bin\java.exe"
|
||||
)
|
||||
)
|
||||
|
||||
if "%_JAVACMD%"=="" set _JAVACMD=java
|
||||
|
||||
rem We use the value of the JAVA_OPTS environment variable if defined, rather than the config.
|
||||
set _JAVA_OPTS=%JAVA_OPTS%
|
||||
if "%_JAVA_OPTS%"=="" set _JAVA_OPTS=%CFG_OPTS%
|
||||
|
||||
set INIT_SBT_VERSION=1.2.8
|
||||
|
||||
:args_loop
|
||||
if "%~1" == "" goto args_end
|
||||
|
||||
if "%~1" == "-jvm-debug" (
|
||||
set JVM_DEBUG=true
|
||||
set /a JVM_DEBUG_PORT=5005 2>nul >nul
|
||||
) else if "!JVM_DEBUG!" == "true" (
|
||||
set /a JVM_DEBUG_PORT=%1 2>nul >nul
|
||||
if not "%~1" == "!JVM_DEBUG_PORT!" (
|
||||
set SBT_ARGS=!SBT_ARGS! %1
|
||||
)
|
||||
) else if /I "%~1" == "new" (
|
||||
set sbt_new=true
|
||||
set SBT_ARGS=!SBT_ARGS! %1
|
||||
) else (
|
||||
set SBT_ARGS=!SBT_ARGS! %1
|
||||
)
|
||||
|
||||
shift
|
||||
goto args_loop
|
||||
:args_end
|
||||
|
||||
rem Confirm a user's intent if the current directory does not look like an sbt
|
||||
rem top-level directory and the "new" command was not given.
|
||||
if not exist build.sbt (
|
||||
if not exist project\ (
|
||||
if not defined sbt_new (
|
||||
echo [warn] Neither build.sbt nor a 'project' directory in the current directory: %CD%
|
||||
setlocal
|
||||
:confirm
|
||||
echo c^) continue
|
||||
echo q^) quit
|
||||
|
||||
set /P reply=?^
|
||||
if /I "!reply!" == "c" (
|
||||
goto confirm_end
|
||||
) else if /I "!reply!" == "q" (
|
||||
exit /B 1
|
||||
)
|
||||
|
||||
goto confirm
|
||||
:confirm_end
|
||||
endlocal
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
call :process
|
||||
|
||||
call :checkjava
|
||||
|
||||
call :copyrt
|
||||
|
||||
if defined JVM_DEBUG_PORT (
|
||||
set _JAVA_OPTS=!_JAVA_OPTS! -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=!JVM_DEBUG_PORT!
|
||||
)
|
||||
|
||||
call :sync_preloaded
|
||||
|
||||
call :run %SBT_ARGS%
|
||||
|
||||
if ERRORLEVEL 1 goto error
|
||||
goto end
|
||||
|
||||
:run
|
||||
|
||||
"%_JAVACMD%" %_JAVA_OPTS% %SBT_OPTS% -cp "%SBT_HOME%sbt-launch.jar" xsbt.boot.Boot %*
|
||||
goto :eof
|
||||
|
||||
:process
|
||||
rem Parses x out of 1.x; for example 8 out of java version 1.8.0_xx
|
||||
rem Otherwise, parses the major version; 9 out of java version 9-ea
|
||||
set JAVA_VERSION=0
|
||||
for /f "tokens=3" %%g in ('"%_JAVACMD%" -Xms32M -Xmx32M -version 2^>^&1 ^| findstr /i version') do (
|
||||
set JAVA_VERSION=%%g
|
||||
)
|
||||
set JAVA_VERSION=%JAVA_VERSION:"=%
|
||||
for /f "delims=.-_ tokens=1-2" %%v in ("%JAVA_VERSION%") do (
|
||||
if /I "%%v" EQU "1" (
|
||||
set JAVA_VERSION=%%w
|
||||
) else (
|
||||
set JAVA_VERSION=%%v
|
||||
)
|
||||
)
|
||||
exit /B 0
|
||||
|
||||
:checkjava
|
||||
set required_version=6
|
||||
if /I %JAVA_VERSION% GEQ %required_version% (
|
||||
exit /B 0
|
||||
)
|
||||
echo.
|
||||
echo The Java Development Kit (JDK) installation you have is not up to date.
|
||||
echo sbt requires at least version %required_version%+, you have
|
||||
echo version %JAVA_VERSION%
|
||||
echo.
|
||||
echo Please go to http://www.oracle.com/technetwork/java/javase/downloads/ and download
|
||||
echo a valid JDK and install before running sbt.
|
||||
echo.
|
||||
exit /B 1
|
||||
|
||||
:copyrt
|
||||
if /I %JAVA_VERSION% GEQ 9 (
|
||||
set rtexport=!SBT_HOME!java9-rt-export.jar
|
||||
|
||||
"%_JAVACMD%" %_JAVA_OPTS% %SBT_OPTS% -jar "!rtexport!" --rt-ext-dir > "%TEMP%.\rtext.txt"
|
||||
set /p java9_ext= < "%TEMP%.\rtext.txt"
|
||||
set java9_rt=!java9_ext!\rt.jar
|
||||
|
||||
if not exist "!java9_rt!" (
|
||||
mkdir "!java9_ext!"
|
||||
"%_JAVACMD%" %_JAVA_OPTS% %SBT_OPTS% -jar "!rtexport!" "!java9_rt!"
|
||||
)
|
||||
set _JAVA_OPTS=!_JAVA_OPTS! -Dscala.ext.dirs="!java9_ext!"
|
||||
|
||||
rem check to see if a GC has been set in the opts
|
||||
echo !_JAVA_OPTS! | findstr /r "Use.*GC" >nul
|
||||
if ERRORLEVEL 1 (
|
||||
rem don't have a GC set - revert to old GC
|
||||
set _JAVA_OPTS=!_JAVA_OPTS! -XX:+UseParallelGC
|
||||
)
|
||||
)
|
||||
exit /B 0
|
||||
|
||||
:sync_preloaded
|
||||
if "%INIT_SBT_VERSION%"=="" (
|
||||
rem FIXME: better %INIT_SBT_VERSION% detection
|
||||
FOR /F "tokens=* USEBACKQ" %%F IN (`dir /b "%SBT_HOME%\..\lib\local-preloaded\org.scala-sbt\sbt" /B`) DO (
|
||||
SET INIT_SBT_VERSION=%%F
|
||||
)
|
||||
)
|
||||
set PRELOAD_SBT_JAR="%UserProfile%\.sbt\preloaded\org.scala-sbt\sbt\%INIT_SBT_VERSION%\jars\sbt.jar"
|
||||
if /I %JAVA_VERSION% GEQ 8 (
|
||||
where robocopy >nul 2>nul
|
||||
if %ERRORLEVEL% equ 0 (
|
||||
REM echo %PRELOAD_SBT_JAR%
|
||||
if not exist %PRELOAD_SBT_JAR% (
|
||||
if exist "%SBT_HOME%\..\lib\local-preloaded\" (
|
||||
echo "about to robocopy"
|
||||
robocopy "%SBT_HOME%\..\lib\local-preloaded" "%UserProfile%\.sbt\preloaded" /E
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
exit /B 0
|
||||
|
||||
:error
|
||||
@endlocal
|
||||
exit /B 1
|
||||
|
||||
:end
|
||||
@endlocal
|
||||
exit /B 0
|
||||
14
scala-stream/sbt-dist/conf/sbtconfig.txt
Normal file
14
scala-stream/sbt-dist/conf/sbtconfig.txt
Normal file
@ -0,0 +1,14 @@
|
||||
# Set the java args to high
|
||||
|
||||
-Xmx512M
|
||||
|
||||
-XX:MaxPermSize=256m
|
||||
|
||||
-XX:ReservedCodeCacheSize=128m
|
||||
|
||||
|
||||
|
||||
# Set the extra SBT options
|
||||
|
||||
-Dsbt.log.format=true
|
||||
|
||||
49
scala-stream/sbt-dist/conf/sbtopts
Normal file
49
scala-stream/sbt-dist/conf/sbtopts
Normal file
@ -0,0 +1,49 @@
|
||||
# ------------------------------------------------ #
|
||||
# The SBT Configuration file. #
|
||||
# ------------------------------------------------ #
|
||||
|
||||
|
||||
# Disable ANSI color codes
|
||||
#
|
||||
#-no-colors
|
||||
|
||||
# Starts sbt even if the current directory contains no sbt project.
|
||||
#
|
||||
-sbt-create
|
||||
|
||||
# Path to global settings/plugins directory (default: ~/.sbt)
|
||||
#
|
||||
#-sbt-dir /etc/sbt
|
||||
|
||||
# Path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
|
||||
#
|
||||
#-sbt-boot ~/.sbt/boot
|
||||
|
||||
# Path to local Ivy repository (default: ~/.ivy2)
|
||||
#
|
||||
#-ivy ~/.ivy2
|
||||
|
||||
# set memory options
|
||||
#
|
||||
#-mem <integer>
|
||||
|
||||
# Use local caches for projects, no sharing.
|
||||
#
|
||||
#-no-share
|
||||
|
||||
# Put SBT in offline mode.
|
||||
#
|
||||
#-offline
|
||||
|
||||
# Sets the SBT version to use.
|
||||
#-sbt-version 0.11.3
|
||||
|
||||
# Scala version (default: latest release)
|
||||
#
|
||||
#-scala-home <path>
|
||||
#-scala-version <version>
|
||||
|
||||
# java version (default: java from PATH, currently $(java -version |& grep version))
|
||||
#
|
||||
#-java-home <path>
|
||||
|
||||
44
scala-stream/src/main/scala/scalastream/J8SStream.scala
Normal file
44
scala-stream/src/main/scala/scalastream/J8SStream.scala
Normal file
@ -0,0 +1,44 @@
|
||||
package scalastream
|
||||
|
||||
import scalastream.App.{Config, Data}
|
||||
|
||||
import scala.collection.immutable.ArraySeq
|
||||
import scala.reflect.{ClassTag, classTag}
|
||||
|
||||
class J8SStream[@specialized(Float, Double) A: Fractional: ClassTag](val config: Config[A])
|
||||
extends ScalaStream[A]:
|
||||
|
||||
private var a: Array[A] = _
|
||||
private var b: Array[A] = _
|
||||
private var c: Array[A] = _
|
||||
private val scalar: A = config.scalar
|
||||
|
||||
inline private def stream =
|
||||
java.util.stream.IntStream.range(0, config.options.arraysize).parallel()
|
||||
|
||||
override inline def initArrays(): Unit =
|
||||
a = Array.ofDim(config.options.arraysize)
|
||||
b = Array.ofDim(config.options.arraysize)
|
||||
c = Array.ofDim(config.options.arraysize)
|
||||
stream.forEach { i =>
|
||||
a(i) = config.init._1
|
||||
b(i) = config.init._2
|
||||
c(i) = config.init._3
|
||||
}
|
||||
|
||||
override inline def copy(): Unit = stream.forEach(i => c(i) = a(i))
|
||||
override inline def mul(): Unit = stream.forEach(i => b(i) = scalar * c(i))
|
||||
override inline def add(): Unit = stream.forEach(i => c(i) = a(i) + b(i))
|
||||
override inline def triad(): Unit = stream.forEach(i => a(i) = b(i) + scalar * c(i))
|
||||
override inline def nstream(): Unit = stream.forEach(i => a(i) = b(i) * scalar * c(i))
|
||||
override inline def dot(): A =
|
||||
// horrible special-case for double, there isn't a mapToFloat so we give up on that
|
||||
val cls = classTag[A].runtimeClass
|
||||
if java.lang.Double.TYPE == cls then
|
||||
stream
|
||||
.mapToDouble(i => (a(i) * b(i)).asInstanceOf[Double])
|
||||
.reduce(0, (l: Double, r: Double) => l + r)
|
||||
.asInstanceOf[A]
|
||||
else stream.mapToObj[A](i => a(i) * b(i)).reduce(0.fractional, (l: A, r: A) => l + r)
|
||||
|
||||
override inline def data(): Data[A] = Data(a.to(ArraySeq), b.to(ArraySeq), c.to(ArraySeq))
|
||||
36
scala-stream/src/main/scala/scalastream/ParStream.scala
Normal file
36
scala-stream/src/main/scala/scalastream/ParStream.scala
Normal file
@ -0,0 +1,36 @@
|
||||
package scalastream
|
||||
|
||||
import scalastream.App.{Config, Data}
|
||||
|
||||
import scala.collection.immutable.ArraySeq
|
||||
import scala.collection.parallel.CollectionConverters._
|
||||
import scala.reflect.ClassTag
|
||||
class ParStream[@specialized(Float, Double) A: Fractional: ClassTag](val config: Config[A])
|
||||
extends ScalaStream[A]:
|
||||
|
||||
private var a: Array[A] = _
|
||||
private var b: Array[A] = _
|
||||
private var c: Array[A] = _
|
||||
private val scalar: A = config.scalar
|
||||
|
||||
inline private def indices = (0 until config.options.arraysize).par
|
||||
|
||||
override inline def initArrays(): Unit =
|
||||
a = Array.ofDim(config.options.arraysize)
|
||||
b = Array.ofDim(config.options.arraysize)
|
||||
c = Array.ofDim(config.options.arraysize)
|
||||
|
||||
for i <- indices do
|
||||
a(i) = config.init._1
|
||||
b(i) = config.init._2
|
||||
c(i) = config.init._3
|
||||
|
||||
override inline def copy(): Unit = for i <- indices do c(i) = a(i)
|
||||
override inline def mul(): Unit = for i <- indices do b(i) = scalar * c(i)
|
||||
override inline def add(): Unit = for i <- indices do c(i) = a(i) + b(i)
|
||||
override inline def triad(): Unit = for i <- indices do a(i) = b(i) + scalar * c(i)
|
||||
override inline def nstream(): Unit = for i <- indices do a(i) = b(i) * scalar * c(i)
|
||||
override inline def dot(): A =
|
||||
indices.aggregate[A](0.fractional)((acc, i) => acc + (a(i) * b(i)), _ + _)
|
||||
|
||||
override inline def data(): Data[A] = Data(a.to(ArraySeq), b.to(ArraySeq), c.to(ArraySeq))
|
||||
31
scala-stream/src/main/scala/scalastream/PlainStream.scala
Normal file
31
scala-stream/src/main/scala/scalastream/PlainStream.scala
Normal file
@ -0,0 +1,31 @@
|
||||
package scalastream
|
||||
|
||||
import scalastream.App.{Config, Data}
|
||||
|
||||
import scala.collection.immutable.ArraySeq
|
||||
import scala.reflect.ClassTag
|
||||
class PlainStream[@specialized(Float, Double) A: Fractional: ClassTag](val config: Config[A])
|
||||
extends ScalaStream[A]:
|
||||
|
||||
private var a: Array[A] = _
|
||||
private var b: Array[A] = _
|
||||
private var c: Array[A] = _
|
||||
private val scalar: A = config.scalar
|
||||
|
||||
override inline def initArrays(): Unit =
|
||||
a = Array.fill(config.options.arraysize)(config.init._1)
|
||||
b = Array.fill(config.options.arraysize)(config.init._2)
|
||||
c = Array.fill(config.options.arraysize)(config.init._3)
|
||||
|
||||
private inline def indices = 0 until config.options.arraysize
|
||||
|
||||
override inline def copy(): Unit = for i <- indices do c(i) = a(i)
|
||||
override inline def mul(): Unit = for i <- indices do b(i) = scalar * c(i)
|
||||
override inline def add(): Unit = for i <- indices do c(i) = a(i) + b(i)
|
||||
override inline def triad(): Unit = for i <- indices do a(i) = b(i) + (scalar * c(i))
|
||||
override inline def nstream(): Unit = for i <- indices do a(i) = b(i) * scalar * c(i)
|
||||
override inline def dot(): A =
|
||||
var acc: A = 0.fractional
|
||||
for i <- indices do acc = acc + (a(i) * b(i))
|
||||
acc
|
||||
override inline def data(): Data[A] = Data(a.to(ArraySeq), b.to(ArraySeq), c.to(ArraySeq))
|
||||
369
scala-stream/src/main/scala/scalastream/ScalaStream.scala
Normal file
369
scala-stream/src/main/scala/scalastream/ScalaStream.scala
Normal file
@ -0,0 +1,369 @@
|
||||
package scalastream
|
||||
import scalastream.App.{Config, Data, Timings}
|
||||
|
||||
import java.util.concurrent.TimeUnit
|
||||
import scala.collection.immutable.ArraySeq
|
||||
import scala.collection.mutable.ArrayBuffer
|
||||
import scala.concurrent.duration.{Duration, FiniteDuration, SECONDS}
|
||||
import scala.math.{Pi, pow}
|
||||
import scala.reflect.ClassTag
|
||||
import scopt.OParser
|
||||
|
||||
transparent trait ScalaStream[@specialized(Float, Double) A]:
|
||||
|
||||
def config: Config[A]
|
||||
|
||||
def initArrays(): Unit
|
||||
def copy(): Unit
|
||||
def mul(): Unit
|
||||
def add(): Unit
|
||||
def triad(): Unit
|
||||
def nstream(): Unit
|
||||
def dot(): A
|
||||
|
||||
transparent inline def timed[R](f: => R): (FiniteDuration, R) =
|
||||
val start = System.nanoTime()
|
||||
val r = f
|
||||
val end = System.nanoTime()
|
||||
FiniteDuration(end - start, TimeUnit.NANOSECONDS) -> r
|
||||
|
||||
inline def runAll(times: Int)(using Fractional[A]): (Timings[Vector[FiniteDuration]], A) =
|
||||
val copy = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
|
||||
val mul = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
|
||||
val add = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
|
||||
val triad = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
|
||||
val dot = ArrayBuffer.fill[FiniteDuration](times)(Duration.Zero)
|
||||
|
||||
var lastSum: A = 0.fractional
|
||||
|
||||
for i <- 0 until times do
|
||||
copy(i) = timed(this.copy())._1
|
||||
mul(i) = timed(this.mul())._1
|
||||
add(i) = timed(this.add())._1
|
||||
triad(i) = timed(this.triad())._1
|
||||
val (dot_, sum) = timed(this.dot())
|
||||
dot(i) = dot_
|
||||
lastSum = sum
|
||||
val s = lastSum
|
||||
|
||||
(
|
||||
Timings(
|
||||
copy = copy.toVector,
|
||||
mul = mul.toVector,
|
||||
add = add.toVector,
|
||||
triad = triad.toVector,
|
||||
dot = dot.toVector
|
||||
),
|
||||
s
|
||||
)
|
||||
|
||||
def runTriad(times: Int): FiniteDuration = timed(for _ <- 0 until times do triad())._1
|
||||
def runNStream(times: Int): Vector[FiniteDuration] = Vector.fill(times)(timed(nstream())._1)
|
||||
|
||||
def data(): Data[A]
|
||||
|
||||
|
||||
trait Fractional[@specialized(Double, Float) A]:
|
||||
def toFractional(f: Float): A
|
||||
def toFractional(f: Double): A
|
||||
def compare(x: A, y: A): Int
|
||||
def add(x: A, y: A): A
|
||||
def sub(x: A, y: A): A
|
||||
def mul(x: A, y: A): A
|
||||
def div(x: A, y: A): A
|
||||
def abs(x: A): A
|
||||
extension (x: Float) inline def fractional = toFractional(x)
|
||||
extension (x: Double) inline def fractional = toFractional(x)
|
||||
extension (x: Int) inline def fractional = toFractional(x.toFloat)
|
||||
extension (x: Long) inline def fractional = toFractional(x.toDouble)
|
||||
extension (x: A)
|
||||
inline def +(y: A) = add(x, y)
|
||||
inline def -(y: A) = sub(x, y)
|
||||
inline def *(y: A) = mul(x, y)
|
||||
inline def /(y: A) = div(x, y)
|
||||
inline def >(y: A) = compare(x, y) > 0
|
||||
inline def <(y: A) = compare(x, y) < 0
|
||||
inline def abs_ = abs(x)
|
||||
end Fractional
|
||||
|
||||
given FloatFractional: Fractional[Float] with
|
||||
inline def toFractional(f: Float): Float = f
|
||||
inline def toFractional(f: Double): Float = f.toFloat
|
||||
inline def compare(x: Float, y: Float): Int = x.compare(y)
|
||||
inline def add(x: Float, y: Float): Float = x + y
|
||||
inline def sub(x: Float, y: Float): Float = x - y
|
||||
inline def mul(x: Float, y: Float): Float = x * y
|
||||
inline def div(x: Float, y: Float): Float = x / y
|
||||
inline def abs(x: Float): Float = math.abs(x)
|
||||
|
||||
given DoubleFractional: Fractional[Double] with
|
||||
inline def toFractional(f: Float): Double = f.toDouble
|
||||
inline def toFractional(f: Double): Double = f
|
||||
inline def compare(x: Double, y: Double): Int = x.compare(y)
|
||||
inline def add(x: Double, y: Double): Double = x + y
|
||||
inline def sub(x: Double, y: Double): Double = x - y
|
||||
inline def mul(x: Double, y: Double): Double = x * y
|
||||
inline def div(x: Double, y: Double): Double = x / y
|
||||
inline def abs(x: Double): Double = math.abs(x)
|
||||
|
||||
object App:
|
||||
|
||||
final val Version: String = "3.4.0"
|
||||
|
||||
case class Config[@specialized(Double, Float) A](
|
||||
options: Options,
|
||||
benchmark: Benchmark,
|
||||
typeSize: Int,
|
||||
ulp: A,
|
||||
scalar: A,
|
||||
init: (A, A, A)
|
||||
)
|
||||
|
||||
case class Timings[A](copy: A, mul: A, add: A, triad: A, dot: A)
|
||||
case class Data[A](@specialized(Double, Float) a: ArraySeq[A], b: ArraySeq[A], c: ArraySeq[A])
|
||||
|
||||
case class Options(
|
||||
list: Boolean = false,
|
||||
device: Int = 0,
|
||||
numtimes: Int = 100,
|
||||
arraysize: Int = 33554432,
|
||||
float: Boolean = false,
|
||||
triad_only: Boolean = false,
|
||||
nstream_only: Boolean = false,
|
||||
csv: Boolean = false,
|
||||
mibibytes: Boolean = false
|
||||
)
|
||||
|
||||
object Options:
|
||||
val Default = Options()
|
||||
val builder = OParser.builder[Options]
|
||||
val parser1 =
|
||||
import builder._
|
||||
OParser.sequence(
|
||||
programName("scala-stream"),
|
||||
head("ScalaStream", s"$Version"),
|
||||
opt[Unit]('l', "list").text("List available devices").action((_, x) => x.copy(list = true)),
|
||||
opt[Int]('d', "device")
|
||||
.text(s"Select device at <device>, defaults to ${Default.device}")
|
||||
.action((v, x) => x.copy(device = v)),
|
||||
opt[Int]('n', "numtimes")
|
||||
.text(s"Run the test <numtimes> times (NUM >= 2), defaults to ${Default.numtimes}")
|
||||
.validate {
|
||||
case n if n >= 2 => success
|
||||
case n => failure(s"$n <= 2")
|
||||
}
|
||||
.action((n, x) => x.copy(numtimes = n)),
|
||||
opt[Int]('a', "arraysize")
|
||||
.text(s"Use <arraysize> elements in the array, defaults to ${Default.arraysize}")
|
||||
.action((v, x) => x.copy(arraysize = v)),
|
||||
opt[Unit]('f', "float")
|
||||
.text("Use floats (rather than doubles)")
|
||||
.action((_, x) => x.copy(float = true)),
|
||||
opt[Unit]('t', "triad_only")
|
||||
.text("Only run triad")
|
||||
.action((_, x) => x.copy(triad_only = true)),
|
||||
opt[Unit]('n', "nstream_only")
|
||||
.text("Only run nstream")
|
||||
.action((_, x) => x.copy(nstream_only = true)),
|
||||
opt[Unit]('c', "csv").text("Output as csv table").action((_, x) => x.copy(csv = true)),
|
||||
opt[Unit]('m', "mibibytes")
|
||||
.text("Use MiB=2^20 for bandwidth calculation (default MB=10^6)")
|
||||
.action((_, x) => x.copy(mibibytes = true)),
|
||||
help('h', "help").text("prints this usage text")
|
||||
)
|
||||
|
||||
enum Benchmark:
|
||||
case All, NStream, Triad
|
||||
|
||||
implicit class RichDuration(private val d: Duration) extends AnyVal:
|
||||
def seconds: Double = d.toUnit(SECONDS)
|
||||
|
||||
def validate[A: Fractional](vec: Data[A], config: Config[A], dotSum: Option[A] = None): Unit =
|
||||
|
||||
var (goldA, goldB, goldC) = config.init
|
||||
for _ <- 0 until config.options.numtimes do
|
||||
config.benchmark match
|
||||
case Benchmark.All =>
|
||||
goldC = goldA
|
||||
goldB = config.scalar * goldC
|
||||
goldC = goldA + goldB
|
||||
goldA = goldB + config.scalar * goldC
|
||||
case Benchmark.Triad =>
|
||||
goldA = goldB + config.scalar * goldC
|
||||
case Benchmark.NStream =>
|
||||
goldA += goldB + config.scalar * goldC
|
||||
|
||||
val tolerance = config.ulp * (100.fractional)
|
||||
def validateXs(name: String, xs: Seq[A], from: A): Unit =
|
||||
val error = xs.map(x => (x - from).abs_).fold(0.fractional)(_ + _) / xs.size.fractional
|
||||
if error > tolerance then
|
||||
Console.err.println(s"Validation failed on $name. Average error $error ")
|
||||
|
||||
validateXs("a", vec.a, goldA)
|
||||
validateXs("b", vec.b, goldB)
|
||||
validateXs("c", vec.c, goldC)
|
||||
|
||||
dotSum.foreach { sum =>
|
||||
val goldSum = (goldA * goldB) * (config.options.arraysize).fractional
|
||||
val error = ((sum - goldSum) / goldSum).abs_
|
||||
if error > 1.fractional / 100000000.fractional then
|
||||
Console.err.println(
|
||||
s"Validation failed on sum. Error $error \nSum was $sum but should be $goldSum"
|
||||
)
|
||||
}
|
||||
|
||||
inline def run[A: Fractional: ClassTag](
|
||||
name: String,
|
||||
config: Config[A],
|
||||
mkStream: Config[A] => ScalaStream[A]
|
||||
): Unit =
|
||||
|
||||
val opt = config.options
|
||||
|
||||
val arrayBytes = opt.arraysize * config.typeSize
|
||||
val totalBytes = arrayBytes * 3
|
||||
val (megaScale, megaSuffix, gigaScale, gigaSuffix) =
|
||||
if !opt.mibibytes then (1.0e-6, "MB", 1.0e-9, "GB")
|
||||
else (pow(2.0, -20), "MiB", pow(2.0, -30), "GiB")
|
||||
|
||||
if !opt.csv then
|
||||
|
||||
val vendor = System.getProperty("java.vendor")
|
||||
val ver = System.getProperty("java.version")
|
||||
val home = System.getProperty("java.home")
|
||||
println(
|
||||
s"""BabelStream
|
||||
|Version: $Version
|
||||
|Implementation: $name; Scala (Java $ver; $vendor; home=$home)""".stripMargin
|
||||
)
|
||||
|
||||
println(s"Running ${config.benchmark match {
|
||||
case Benchmark.All => "kernels"
|
||||
case Benchmark.Triad => "triad"
|
||||
case Benchmark.NStream => "nstream"
|
||||
}} ${opt.numtimes} times")
|
||||
|
||||
if config.benchmark == Benchmark.Triad then println(s"Number of elements: ${opt.arraysize}")
|
||||
|
||||
println(s"Precision: ${if opt.float then "float" else "double"}")
|
||||
println(
|
||||
f"Array size: ${megaScale * arrayBytes}%.1f $megaSuffix (=${gigaScale * arrayBytes}%.1f $gigaSuffix)"
|
||||
)
|
||||
println(
|
||||
f"Total size: ${megaScale * totalBytes}%.1f $megaSuffix (=${gigaScale * totalBytes}%.1f $gigaSuffix)"
|
||||
)
|
||||
|
||||
def mkRow(xs: Vector[FiniteDuration], name: String, totalBytes: Int) =
|
||||
val tail = xs.tail
|
||||
(tail.minOption.map(_.seconds), tail.maxOption.map(_.seconds)) match
|
||||
case (Some(min), Some(max)) =>
|
||||
val avg = (tail.foldLeft(Duration.Zero)(_ + _) / tail.size.toDouble).seconds
|
||||
val mbps = megaScale * totalBytes.toDouble / min
|
||||
if opt.csv then
|
||||
Vector(
|
||||
"function" -> name,
|
||||
"num_times" -> opt.numtimes.toString,
|
||||
"n_elements" -> opt.arraysize.toString,
|
||||
"sizeof" -> totalBytes.toString,
|
||||
s"max_m${if opt.mibibytes then "i" else ""}bytes_per_sec" -> mbps.toString,
|
||||
"min_runtime" -> min.toString,
|
||||
"max_runtime" -> max.toString,
|
||||
"avg_runtime" -> avg.toString
|
||||
)
|
||||
else
|
||||
Vector(
|
||||
"Function" -> name,
|
||||
s"M${if opt.mibibytes then "i" else ""}Bytes/sec" -> f"$mbps%.3f",
|
||||
"Min (sec)" -> f"$min%.5f",
|
||||
"Max" -> f"$max%.5f",
|
||||
"Average" -> f"$avg%.5f"
|
||||
)
|
||||
case (_, _) => sys.error(s"No min/max element for $name(size=$totalBytes)")
|
||||
|
||||
def tabulate(rows: Vector[(String, String)]*): Unit = rows.toList match
|
||||
case Nil => sys.error(s"Empty tabulation")
|
||||
case header :: _ =>
|
||||
val padding = if opt.csv then 0 else 12
|
||||
val sep = if opt.csv then "," else ""
|
||||
println(header.map(_._1.padTo(padding, ' ')).mkString(sep))
|
||||
println(rows.map(_.map(_._2.padTo(padding, ' ')).mkString(sep)).mkString("\n"))
|
||||
|
||||
val stream = mkStream(config)
|
||||
stream.initArrays()
|
||||
config.benchmark match
|
||||
case Benchmark.All =>
|
||||
val (results, sum) = stream.runAll(opt.numtimes)
|
||||
validate(stream.data(), config, Some(sum))
|
||||
tabulate(
|
||||
mkRow(results.copy, "Copy", 2 * arrayBytes),
|
||||
mkRow(results.mul, "Mul", 2 * arrayBytes),
|
||||
mkRow(results.add, "Add", 3 * arrayBytes),
|
||||
mkRow(results.triad, "Triad", 3 * arrayBytes),
|
||||
mkRow(results.dot, "Dot", 2 * arrayBytes)
|
||||
)
|
||||
case Benchmark.NStream =>
|
||||
val result = stream.runNStream(opt.numtimes)
|
||||
validate(stream.data(), config)
|
||||
tabulate(mkRow(result, "Nstream", 4 * arrayBytes))
|
||||
case Benchmark.Triad =>
|
||||
val results = stream.runTriad(opt.numtimes)
|
||||
val totalBytes = 3 * arrayBytes * opt.numtimes
|
||||
val bandwidth = megaScale * (totalBytes / results.seconds)
|
||||
println(f"Runtime (seconds): ${results.seconds}%.5f")
|
||||
println(f"Bandwidth ($gigaSuffix/s): $bandwidth%.3f ")
|
||||
|
||||
inline def devices[A: Fractional: ClassTag]: Vector[(String, Config[A] => ScalaStream[A])] =
|
||||
Vector(
|
||||
"Scala Parallel Collections" -> (ParStream(_)),
|
||||
"Java 8 Stream" -> (J8SStream(_)),
|
||||
"Threaded" -> (ThreadStream(_)),
|
||||
"Serial" -> (PlainStream(_))
|
||||
)
|
||||
|
||||
inline def runWith[A: Fractional: ClassTag](i: Int, config: Config[A]): Unit =
|
||||
devices[A].lift(i) match
|
||||
case None => println(s"Device index out of bounds: $i")
|
||||
case Some((name, mkStream)) => run(name, config, mkStream)
|
||||
|
||||
def main(args: Array[String]): Unit =
|
||||
|
||||
def handleOpt(opt: Options) =
|
||||
val benchmark = (opt.nstream_only, opt.triad_only) match
|
||||
case (true, false) => Benchmark.NStream
|
||||
case (false, true) => Benchmark.Triad
|
||||
case (false, false) => Benchmark.All
|
||||
case (true, true) =>
|
||||
throw new RuntimeException(
|
||||
"Both triad and nstream are enabled, pick one or omit both to run all benchmarks"
|
||||
)
|
||||
|
||||
if opt.list then
|
||||
devices[Float].zipWithIndex.foreach { case ((name, _), i) => println(s"$i: $name") }
|
||||
else if opt.float then
|
||||
runWith(
|
||||
opt.device,
|
||||
Config(
|
||||
options = opt,
|
||||
benchmark = benchmark,
|
||||
typeSize = 4, // 32bit
|
||||
ulp = math.ulp(Float.MaxValue),
|
||||
scalar = 0.4f,
|
||||
init = (0.1f, 0.2f, 0.0f)
|
||||
)
|
||||
)
|
||||
else
|
||||
runWith(
|
||||
opt.device,
|
||||
Config(
|
||||
options = opt,
|
||||
benchmark = benchmark,
|
||||
typeSize = 8,
|
||||
ulp = math.ulp(Double.MaxValue),
|
||||
scalar = 0.4, // 64bit
|
||||
init = (0.1, 0.2, 0.0)
|
||||
)
|
||||
)
|
||||
|
||||
OParser.parse(Options.parser1, args, Options.Default) match
|
||||
case Some(config) => handleOpt(config)
|
||||
case _ => sys.exit(1)
|
||||
68
scala-stream/src/main/scala/scalastream/ThreadStream.scala
Normal file
68
scala-stream/src/main/scala/scalastream/ThreadStream.scala
Normal file
@ -0,0 +1,68 @@
|
||||
package scalastream
|
||||
|
||||
import net.openhft.affinity.{AffinityStrategies, AffinityThreadFactory}
|
||||
import scalastream.App.{Config, Data}
|
||||
|
||||
import java.util.concurrent.{Callable, Executors}
|
||||
import scala.collection.immutable.ArraySeq
|
||||
import scala.reflect.ClassTag
|
||||
object ThreadStream {}
|
||||
class ThreadStream[@specialized(Float, Double) A: Fractional: ClassTag](val config: Config[A])
|
||||
extends ScalaStream[A]:
|
||||
|
||||
private var a: Array[A] = _
|
||||
private var b: Array[A] = _
|
||||
private var c: Array[A] = _
|
||||
private val scalar: A = config.scalar
|
||||
|
||||
private val chunks: Int = sys.runtime.availableProcessors()
|
||||
|
||||
private val pool = Executors.newFixedThreadPool(
|
||||
chunks,
|
||||
new AffinityThreadFactory("scala-stream", true, AffinityStrategies.DIFFERENT_CORE)
|
||||
)
|
||||
|
||||
private val indices = (0 until config.options.arraysize)
|
||||
.grouped(config.options.arraysize / chunks)
|
||||
.toSeq
|
||||
|
||||
private inline def forEachAll[C](c: => C)(f: (C, Int) => Unit): Seq[C] =
|
||||
import scala.jdk.CollectionConverters._
|
||||
val xs = pool
|
||||
.invokeAll(
|
||||
indices.map { r =>
|
||||
{ () =>
|
||||
val ctx = c
|
||||
r.foreach(f(ctx, _))
|
||||
ctx
|
||||
}: Callable[C]
|
||||
}.asJavaCollection
|
||||
)
|
||||
.asScala
|
||||
.map(_.get())
|
||||
.toSeq
|
||||
xs
|
||||
|
||||
override inline def initArrays(): Unit =
|
||||
a = Array.ofDim(config.options.arraysize)
|
||||
b = Array.ofDim(config.options.arraysize)
|
||||
c = Array.ofDim(config.options.arraysize)
|
||||
forEachAll(()) { (_, i) =>
|
||||
a(i) = config.init._1
|
||||
b(i) = config.init._2
|
||||
c(i) = config.init._3
|
||||
}
|
||||
()
|
||||
|
||||
class Box(var value: A)
|
||||
override inline def copy(): Unit = { forEachAll(())((_, i) => c(i) = a(i)); () }
|
||||
override inline def mul(): Unit = { forEachAll(())((_, i) => b(i) = scalar * c(i)); () }
|
||||
override inline def add(): Unit = { forEachAll(())((_, i) => c(i) = a(i) + b(i)); () }
|
||||
override inline def triad(): Unit = { forEachAll(())((_, i) => a(i) = b(i) + scalar * c(i)); () }
|
||||
override inline def nstream(): Unit = { forEachAll(())((_, i) => a(i) = b(i) * scalar * c(i)); () }
|
||||
|
||||
override inline def dot(): A =
|
||||
forEachAll(Box(0.fractional))((acc, i) => acc.value = acc.value + (a(i) * b(i)))
|
||||
.map(_.value)
|
||||
.fold(0.fractional)(_ + _)
|
||||
override inline def data(): Data[A] = Data(a.to(ArraySeq), b.to(ArraySeq), c.to(ArraySeq))
|
||||
Loading…
Reference in New Issue
Block a user