diff --git a/JuliaStream.jl/AMDGPU/Manifest.toml b/JuliaStream.jl/AMDGPU/Manifest.toml index 5d1a8a7..6525501 100644 --- a/JuliaStream.jl/AMDGPU/Manifest.toml +++ b/JuliaStream.jl/AMDGPU/Manifest.toml @@ -109,15 +109,15 @@ version = "1.3.0" [[LLVM]] deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"] -git-tree-sha1 = "d6041ad706cf458b2c9f3e501152488a26451e9c" +git-tree-sha1 = "23a47d417a3cd9c2e73c854bac7dd4731c105ef7" uuid = "929cbde3-209d-540e-8aea-75f648917ca0" -version = "4.2.0" +version = "4.4.0" [[LLVMExtra_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "2d5a0044d6505f4771b5c82de87393f0c9741154" +git-tree-sha1 = "9c360e5ce980b88bb31a7b086dbb19469008154b" uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" -version = "0.0.8+0" +version = "0.0.10+0" [[LibCURL]] deps = ["LibCURL_jll", "MozillaCACerts_jll"] diff --git a/JuliaStream.jl/CUDA/Manifest.toml b/JuliaStream.jl/CUDA/Manifest.toml index af0acfc..ef6da14 100644 --- a/JuliaStream.jl/CUDA/Manifest.toml +++ b/JuliaStream.jl/CUDA/Manifest.toml @@ -52,9 +52,9 @@ version = "1.3.0" [[Compat]] deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] -git-tree-sha1 = "79b9563ef3f2cc5fc6d3046a5ee1a57c9de52495" +git-tree-sha1 = "727e463cfebd0c7b999bbf3e9e7e16f254b94193" uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "3.33.0" +version = "3.34.0" [[CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] @@ -116,15 +116,15 @@ version = "1.3.0" [[LLVM]] deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"] -git-tree-sha1 = "d6041ad706cf458b2c9f3e501152488a26451e9c" +git-tree-sha1 = "23a47d417a3cd9c2e73c854bac7dd4731c105ef7" uuid = "929cbde3-209d-540e-8aea-75f648917ca0" -version = "4.2.0" +version = "4.4.0" [[LLVMExtra_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "2d5a0044d6505f4771b5c82de87393f0c9741154" +git-tree-sha1 = "9c360e5ce980b88bb31a7b086dbb19469008154b" uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" -version = "0.0.8+0" +version = "0.0.10+0" [[LazyArtifacts]] deps = ["Artifacts", "Pkg"] @@ -231,9 +231,9 @@ uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143" version = "1.5.3" [[Reexport]] -git-tree-sha1 = "5f6c21241f0f655da3952fd60aa18477cf96c220" +git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" uuid = "189a3867-3050-52da-a836-e630ba90ab69" -version = "1.1.0" +version = "1.2.2" [[Requires]] deps = ["UUIDs"] diff --git a/JuliaStream.jl/KernelAbstractions/Manifest.toml b/JuliaStream.jl/KernelAbstractions/Manifest.toml index 25fd8d1..bfc562f 100644 --- a/JuliaStream.jl/KernelAbstractions/Manifest.toml +++ b/JuliaStream.jl/KernelAbstractions/Manifest.toml @@ -69,9 +69,9 @@ uuid = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57" version = "0.3.0" [[Cassette]] -git-tree-sha1 = "087e76b8d48c014112ba890892c33be42ad10504" +git-tree-sha1 = "b4b1d61ebbae2bc69a45e3a6b8439b4e411bc131" uuid = "7057c7e9-c182-5462-911a-8362d720325c" -version = "0.3.7" +version = "0.3.8" [[ChainRulesCore]] deps = ["Compat", "LinearAlgebra", "SparseArrays"] @@ -81,9 +81,9 @@ version = "1.3.0" [[Compat]] deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] -git-tree-sha1 = "79b9563ef3f2cc5fc6d3046a5ee1a57c9de52495" +git-tree-sha1 = "727e463cfebd0c7b999bbf3e9e7e16f254b94193" uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "3.33.0" +version = "3.34.0" [[CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] @@ -179,15 +179,15 @@ version = "0.7.0" [[LLVM]] deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"] -git-tree-sha1 = "d6041ad706cf458b2c9f3e501152488a26451e9c" +git-tree-sha1 = "23a47d417a3cd9c2e73c854bac7dd4731c105ef7" uuid = "929cbde3-209d-540e-8aea-75f648917ca0" -version = "4.2.0" +version = "4.4.0" [[LLVMExtra_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "2d5a0044d6505f4771b5c82de87393f0c9741154" +git-tree-sha1 = "9c360e5ce980b88bb31a7b086dbb19469008154b" uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" -version = "0.0.8+0" +version = "0.0.10+0" [[LazyArtifacts]] deps = ["Artifacts", "Pkg"] @@ -354,9 +354,9 @@ uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143" version = "1.5.3" [[Reexport]] -git-tree-sha1 = "5f6c21241f0f655da3952fd60aa18477cf96c220" +git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" uuid = "189a3867-3050-52da-a836-e630ba90ab69" -version = "1.1.0" +version = "1.2.2" [[Requires]] deps = ["UUIDs"] diff --git a/JuliaStream.jl/Manifest.toml b/JuliaStream.jl/Manifest.toml index 14f2029..c096e05 100644 --- a/JuliaStream.jl/Manifest.toml +++ b/JuliaStream.jl/Manifest.toml @@ -69,9 +69,9 @@ uuid = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57" version = "0.3.0" [[Cassette]] -git-tree-sha1 = "087e76b8d48c014112ba890892c33be42ad10504" +git-tree-sha1 = "b4b1d61ebbae2bc69a45e3a6b8439b4e411bc131" uuid = "7057c7e9-c182-5462-911a-8362d720325c" -version = "0.3.7" +version = "0.3.8" [[ChainRulesCore]] deps = ["Compat", "LinearAlgebra", "SparseArrays"] @@ -81,9 +81,9 @@ version = "1.3.0" [[Compat]] deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] -git-tree-sha1 = "79b9563ef3f2cc5fc6d3046a5ee1a57c9de52495" +git-tree-sha1 = "727e463cfebd0c7b999bbf3e9e7e16f254b94193" uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "3.33.0" +version = "3.34.0" [[CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] @@ -312,9 +312,9 @@ uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143" version = "1.5.3" [[Reexport]] -git-tree-sha1 = "5f6c21241f0f655da3952fd60aa18477cf96c220" +git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" uuid = "189a3867-3050-52da-a836-e630ba90ab69" -version = "1.1.0" +version = "1.2.2" [[Requires]] deps = ["UUIDs"] diff --git a/JuliaStream.jl/oneAPI/Manifest.toml b/JuliaStream.jl/oneAPI/Manifest.toml index 3aab94b..82c40fd 100644 --- a/JuliaStream.jl/oneAPI/Manifest.toml +++ b/JuliaStream.jl/oneAPI/Manifest.toml @@ -34,9 +34,9 @@ version = "1.3.0" [[Compat]] deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] -git-tree-sha1 = "79b9563ef3f2cc5fc6d3046a5ee1a57c9de52495" +git-tree-sha1 = "727e463cfebd0c7b999bbf3e9e7e16f254b94193" uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "3.33.0" +version = "3.34.0" [[CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] @@ -98,15 +98,15 @@ version = "1.3.0" [[LLVM]] deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"] -git-tree-sha1 = "d6041ad706cf458b2c9f3e501152488a26451e9c" +git-tree-sha1 = "23a47d417a3cd9c2e73c854bac7dd4731c105ef7" uuid = "929cbde3-209d-540e-8aea-75f648917ca0" -version = "4.2.0" +version = "4.4.0" [[LLVMExtra_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "2d5a0044d6505f4771b5c82de87393f0c9741154" +git-tree-sha1 = "9c360e5ce980b88bb31a7b086dbb19469008154b" uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" -version = "0.0.8+0" +version = "0.0.10+0" [[LibCURL]] deps = ["LibCURL_jll", "MozillaCACerts_jll"] diff --git a/JuliaStream.jl/src/AMDGPUStream.jl b/JuliaStream.jl/src/AMDGPUStream.jl index 8347637..4dd220c 100644 --- a/JuliaStream.jl/src/AMDGPUStream.jl +++ b/JuliaStream.jl/src/AMDGPUStream.jl @@ -55,7 +55,7 @@ function init_arrays!(data::ROCData{T}, _, init::Tuple{T,T,T}) where {T} end function copy!(data::ROCData{T}, _) where {T} - function kernel(a, c) + function kernel(a::AbstractArray{T}, c::AbstractArray{T}) i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1 @inbounds c[i] = a[i] return @@ -66,7 +66,7 @@ function copy!(data::ROCData{T}, _) where {T} end function mul!(data::ROCData{T}, _) where {T} - function kernel(b, c, scalar) + function kernel(b::AbstractArray{T}, c::AbstractArray{T}, scalar::T) i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1 @inbounds b[i] = scalar * c[i] return @@ -77,7 +77,7 @@ function mul!(data::ROCData{T}, _) where {T} end function add!(data::ROCData{T}, _) where {T} - function kernel(a, b, c) + function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}) i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1 @inbounds c[i] = a[i] + b[i] return @@ -88,7 +88,7 @@ function add!(data::ROCData{T}, _) where {T} end function triad!(data::ROCData{T}, _) where {T} - function kernel(a, b, c, scalar) + function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T) i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1 @inbounds a[i] = b[i] + (scalar * c[i]) return @@ -104,7 +104,7 @@ function triad!(data::ROCData{T}, _) where {T} end function nstream!(data::ROCData{T}, _) where {T} - function kernel(a, b, c, scalar) + function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T) i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1 @inbounds a[i] += b[i] + scalar * c[i] return @@ -120,7 +120,7 @@ function nstream!(data::ROCData{T}, _) where {T} end function dot(data::ROCData{T}, _) where {T} - function kernel(a, b, size, partial) + function kernel(a::AbstractArray{T}, b::AbstractArray{T}, size::Int, partial::AbstractArray{T}) tb_sum = ROCDeviceArray((TBSize,), alloc_local(:reduce, T, TBSize)) local_i = workitemIdx().x @inbounds tb_sum[local_i] = 0.0 diff --git a/JuliaStream.jl/src/CUDAStream.jl b/JuliaStream.jl/src/CUDAStream.jl index b46b3c9..da3698e 100644 --- a/JuliaStream.jl/src/CUDAStream.jl +++ b/JuliaStream.jl/src/CUDAStream.jl @@ -51,7 +51,7 @@ function init_arrays!(data::CuData{T}, _, init::Tuple{T,T,T}) where {T} end function copy!(data::CuData{T}, _) where {T} - function kernel(a, c) + function kernel(a::AbstractArray{T}, c::AbstractArray{T}) i = (blockIdx().x - 1) * blockDim().x + threadIdx().x @inbounds c[i] = a[i] return @@ -61,7 +61,7 @@ function copy!(data::CuData{T}, _) where {T} end function mul!(data::CuData{T}, _) where {T} - function kernel(b, c, scalar) + function kernel(b::AbstractArray{T}, c::AbstractArray{T}, scalar::T) i = (blockIdx().x - 1) * blockDim().x + threadIdx().x @inbounds b[i] = scalar * c[i] return @@ -71,7 +71,7 @@ function mul!(data::CuData{T}, _) where {T} end function add!(data::CuData{T}, _) where {T} - function kernel(a, b, c) + function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}) i = (blockIdx().x - 1) * blockDim().x + threadIdx().x @inbounds c[i] = a[i] + b[i] return @@ -81,7 +81,7 @@ function add!(data::CuData{T}, _) where {T} end function triad!(data::CuData{T}, _) where {T} - function kernel(a, b, c, scalar) + function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T) i = (blockIdx().x - 1) * blockDim().x + threadIdx().x @inbounds a[i] = b[i] + (scalar * c[i]) return @@ -96,7 +96,7 @@ function triad!(data::CuData{T}, _) where {T} end function nstream!(data::CuData{T}, _) where {T} - function kernel(a, b, c, scalar) + function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T) i = (blockIdx().x - 1) * blockDim().x + threadIdx().x @inbounds a[i] += b[i] + scalar * c[i] return @@ -112,7 +112,7 @@ end function dot(data::CuData{T}, _) where {T} # direct port of the reduction in CUDAStream.cu - function kernel(a, b, size, partial) + function kernel(a::AbstractArray{T}, b::AbstractArray{T}, size::Int, partial::AbstractArray{T}) tb_sum = @cuStaticSharedMem(T, TBSize) local_i = threadIdx().x @inbounds tb_sum[local_i] = 0.0 diff --git a/JuliaStream.jl/src/KernelAbstractionsStream.jl b/JuliaStream.jl/src/KernelAbstractionsStream.jl index 8cc3699..2b9d9ad 100644 --- a/JuliaStream.jl/src/KernelAbstractionsStream.jl +++ b/JuliaStream.jl/src/KernelAbstractionsStream.jl @@ -136,7 +136,7 @@ function init_arrays!( end function copy!(data::StreamData{T,C}, context::Context) where {T,C} - @kernel function kernel(@Const(a), c) + @kernel function kernel(@Const(a::AbstractArray{T}), c) i = @index(Global) @inbounds c[i] = a[i] end @@ -144,7 +144,7 @@ function copy!(data::StreamData{T,C}, context::Context) where {T,C} end function mul!(data::StreamData{T,C}, context::Context) where {T,C} - @kernel function kernel(b, @Const(c), scalar) + @kernel function kernel(b::AbstractArray{T}, @Const(c::AbstractArray{T}), scalar::T) i = @index(Global) @inbounds b[i] = scalar * c[i] end @@ -152,7 +152,7 @@ function mul!(data::StreamData{T,C}, context::Context) where {T,C} end function add!(data::StreamData{T,C}, context::Context) where {T,C} - @kernel function kernel(@Const(a), @Const(b), c) + @kernel function kernel(@Const(a::AbstractArray{T}), @Const(b::AbstractArray{T}), c) i = @index(Global) @inbounds c[i] = a[i] + b[i] end @@ -160,7 +160,7 @@ function add!(data::StreamData{T,C}, context::Context) where {T,C} end function triad!(data::StreamData{T,C}, context::Context) where {T,C} - @kernel function kernel(a, @Const(b), @Const(c), scalar) + @kernel function kernel(a::AbstractArray{T}, @Const(b::AbstractArray{T}), @Const(c), scalar::T) i = @index(Global) @inbounds a[i] = b[i] + (scalar * c[i]) end @@ -176,7 +176,7 @@ function triad!(data::StreamData{T,C}, context::Context) where {T,C} end function nstream!(data::StreamData{T,C}, context::Context) where {T,C} - @kernel function kernel(a, @Const(b), @Const(c), scalar) + @kernel function kernel(a::AbstractArray{T}, @Const(b::AbstractArray{T}), @Const(c), scalar::T) i = @index(Global) @inbounds a[i] += b[i] + scalar * c[i] end @@ -192,7 +192,7 @@ function nstream!(data::StreamData{T,C}, context::Context) where {T,C} end function dot(data::StreamData{T,C}, context::Context) where {T,C} - @kernel function kernel(@Const(a), @Const(b), size, partial) + @kernel function kernel(@Const(a::AbstractArray{T}), @Const(b::AbstractArray{T}), size::Int, partial::AbstractArray{T}) local_i = @index(Local) group_i = @index(Group) tb_sum = @localmem T TBSize diff --git a/JuliaStream.jl/src/oneAPIStream.jl b/JuliaStream.jl/src/oneAPIStream.jl index 1bc319d..83f100e 100644 --- a/JuliaStream.jl/src/oneAPIStream.jl +++ b/JuliaStream.jl/src/oneAPIStream.jl @@ -53,7 +53,7 @@ function init_arrays!(data::oneData{T}, _, init::Tuple{T,T,T}) where {T} end function copy!(data::oneData{T}, groupsize::Int) where {T} - function kernel(a, c) + function kernel(a::AbstractArray{T}, c::AbstractArray{T}) i = get_global_id() @inbounds c[i] = a[i] return @@ -66,7 +66,7 @@ function copy!(data::oneData{T}, groupsize::Int) where {T} end function mul!(data::oneData{T}, groupsize::Int) where {T} - function kernel(b, c, scalar) + function kernel(b::AbstractArray{T}, c::AbstractArray{T}, scalar::T) i = get_global_id() @inbounds b[i] = scalar * c[i] return @@ -80,7 +80,7 @@ function mul!(data::oneData{T}, groupsize::Int) where {T} end function add!(data::oneData{T}, groupsize::Int) where {T} - function kernel(a, b, c) + function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}) i = get_global_id() @inbounds c[i] = a[i] + b[i] return @@ -94,7 +94,7 @@ function add!(data::oneData{T}, groupsize::Int) where {T} end function triad!(data::oneData{T}, groupsize::Int) where {T} - function kernel(a, b, c, scalar) + function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T) i = get_global_id() @inbounds a[i] = b[i] + (scalar * c[i]) return @@ -109,7 +109,7 @@ function triad!(data::oneData{T}, groupsize::Int) where {T} end function nstream!(data::oneData{T}, groupsize::Int) where {T} - function kernel(a, b, c, scalar) + function kernel(a::AbstractArray{T}, b::AbstractArray{T}, c::AbstractArray{T}, scalar::T) i = get_global_id() @inbounds a[i] += b[i] + scalar * c[i] return @@ -124,7 +124,7 @@ function nstream!(data::oneData{T}, groupsize::Int) where {T} end function dot(data::oneData{T}, groupsize::Int) where {T} - function kernel(a, b, size, partial) + function kernel(a::AbstractArray{T}, b::AbstractArray{T}, size::Int, partial::AbstractArray{T}) wg_sum = @LocalMemory(T, (DotWGSize,)) li = get_local_id() @inbounds wg_sum[li] = 0.0