Inline blocks in CUDAStream
This commit is contained in:
parent
418315543c
commit
2e957d3f60
@ -10,10 +10,6 @@ function devices()
|
|||||||
map(d -> "$(CUDA.name(d)) ($(repr(d)))", CUDA.devices())
|
map(d -> "$(CUDA.name(d)) ($(repr(d)))", CUDA.devices())
|
||||||
end
|
end
|
||||||
|
|
||||||
function blocks(data::CuData{T})::Int where {T}
|
|
||||||
return data.size ÷ TBSize
|
|
||||||
end
|
|
||||||
|
|
||||||
function make_stream(
|
function make_stream(
|
||||||
arraysize::Int,
|
arraysize::Int,
|
||||||
scalar::T,
|
scalar::T,
|
||||||
@ -42,7 +38,7 @@ function make_stream(
|
|||||||
)
|
)
|
||||||
if !silent
|
if !silent
|
||||||
println("Using CUDA device: $(CUDA.name(selected)) ($(repr(selected)))")
|
println("Using CUDA device: $(CUDA.name(selected)) ($(repr(selected)))")
|
||||||
println("Kernel parameters: <<<$(blocks(data)),$(TBSize)>>>")
|
println("Kernel parameters: <<<$(data.size ÷ TBSize),$(TBSize)>>>")
|
||||||
end
|
end
|
||||||
return data
|
return data
|
||||||
end
|
end
|
||||||
@ -59,7 +55,7 @@ function copy!(data::CuData{T}) where {T}
|
|||||||
@inbounds c[i] = a[i]
|
@inbounds c[i] = a[i]
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
@cuda blocks = blocks(data) threads = TBSize kernel(data.a, data.c)
|
@cuda blocks = data.size ÷ TBSize threads = TBSize kernel(data.a, data.c)
|
||||||
CUDA.synchronize()
|
CUDA.synchronize()
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -69,7 +65,7 @@ function mul!(data::CuData{T}) where {T}
|
|||||||
@inbounds b[i] = scalar * c[i]
|
@inbounds b[i] = scalar * c[i]
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
@cuda blocks = blocks(data) threads = TBSize kernel(data.b, data.c, data.scalar)
|
@cuda blocks = data.size ÷ TBSize threads = TBSize kernel(data.b, data.c, data.scalar)
|
||||||
CUDA.synchronize()
|
CUDA.synchronize()
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -79,7 +75,7 @@ function add!(data::CuData{T}) where {T}
|
|||||||
@inbounds c[i] = a[i] + b[i]
|
@inbounds c[i] = a[i] + b[i]
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
@cuda blocks = blocks(data) threads = TBSize kernel(data.a, data.b, data.c)
|
@cuda blocks = data.size ÷ TBSize threads = TBSize kernel(data.a, data.b, data.c)
|
||||||
CUDA.synchronize()
|
CUDA.synchronize()
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -89,7 +85,12 @@ function triad!(data::CuData{T}) where {T}
|
|||||||
@inbounds a[i] = b[i] + (scalar * c[i])
|
@inbounds a[i] = b[i] + (scalar * c[i])
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
@cuda blocks = blocks(data) threads = TBSize kernel(data.a, data.b, data.c, data.scalar)
|
@cuda blocks = data.size ÷ TBSize threads = TBSize kernel(
|
||||||
|
data.a,
|
||||||
|
data.b,
|
||||||
|
data.c,
|
||||||
|
data.scalar,
|
||||||
|
)
|
||||||
CUDA.synchronize()
|
CUDA.synchronize()
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -99,7 +100,12 @@ function nstream!(data::CuData{T}) where {T}
|
|||||||
@inbounds a[i] += b[i] + scalar * c[i]
|
@inbounds a[i] += b[i] + scalar * c[i]
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
@cuda blocks = blocks(data) threads = TBSize kernel(data.a, data.b, data.c, data.scalar)
|
@cuda blocks = data.size ÷ TBSize threads = TBSize kernel(
|
||||||
|
data.a,
|
||||||
|
data.b,
|
||||||
|
data.c,
|
||||||
|
data.scalar,
|
||||||
|
)
|
||||||
CUDA.synchronize()
|
CUDA.synchronize()
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user