Inline AMDGPU's hard_wait
Show the selected implementation and not a constant "threaded"
This commit is contained in:
parent
d799535c96
commit
4e6c56729b
@ -56,11 +56,6 @@ function make_stream(
|
|||||||
return data
|
return data
|
||||||
end
|
end
|
||||||
|
|
||||||
function hard_wait(kernel)
|
|
||||||
# soft wait causes HSA_REFCOUNT overflow issues
|
|
||||||
AMDGPU.wait(kernel, soft = false)
|
|
||||||
end
|
|
||||||
|
|
||||||
function init_arrays!(data::ROCData{T}, init::Tuple{T,T,T}) where {T}
|
function init_arrays!(data::ROCData{T}, init::Tuple{T,T,T}) where {T}
|
||||||
AMDGPU.fill!(data.a, init[1])
|
AMDGPU.fill!(data.a, init[1])
|
||||||
AMDGPU.fill!(data.b, init[2])
|
AMDGPU.fill!(data.b, init[2])
|
||||||
@ -73,7 +68,10 @@ function copy!(data::ROCData{T}) where {T}
|
|||||||
@inbounds c[i] = a[i]
|
@inbounds c[i] = a[i]
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
hard_wait(@roc groupsize = TBSize gridsize = gridsize(data) kernel(data.a, data.c))
|
AMDGPU.wait(
|
||||||
|
soft = false, # soft wait causes HSA_REFCOUNT overflow issues
|
||||||
|
@roc groupsize = TBSize gridsize = gridsize(data) kernel(data.a, data.c)
|
||||||
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
function mul!(data::ROCData{T}) where {T}
|
function mul!(data::ROCData{T}) where {T}
|
||||||
@ -82,7 +80,8 @@ function mul!(data::ROCData{T}) where {T}
|
|||||||
@inbounds b[i] = scalar * c[i]
|
@inbounds b[i] = scalar * c[i]
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
hard_wait(
|
AMDGPU.wait(
|
||||||
|
soft = false, # soft wait causes HSA_REFCOUNT overflow issues
|
||||||
@roc groupsize = TBSize gridsize = gridsize(data) kernel(data.b, data.c, data.scalar)
|
@roc groupsize = TBSize gridsize = gridsize(data) kernel(data.b, data.c, data.scalar)
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
@ -93,7 +92,8 @@ function add!(data::ROCData{T}) where {T}
|
|||||||
@inbounds c[i] = a[i] + b[i]
|
@inbounds c[i] = a[i] + b[i]
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
hard_wait(
|
AMDGPU.wait(
|
||||||
|
soft = false, # soft wait causes HSA_REFCOUNT overflow issues
|
||||||
@roc groupsize = TBSize gridsize = gridsize(data) kernel(data.a, data.b, data.c)
|
@roc groupsize = TBSize gridsize = gridsize(data) kernel(data.a, data.b, data.c)
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
@ -104,7 +104,8 @@ function triad!(data::ROCData{T}) where {T}
|
|||||||
@inbounds a[i] = b[i] + (scalar * c[i])
|
@inbounds a[i] = b[i] + (scalar * c[i])
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
hard_wait(
|
AMDGPU.wait(
|
||||||
|
soft = false, # soft wait causes HSA_REFCOUNT overflow issues
|
||||||
@roc groupsize = TBSize gridsize = gridsize(data) kernel(
|
@roc groupsize = TBSize gridsize = gridsize(data) kernel(
|
||||||
data.a,
|
data.a,
|
||||||
data.b,
|
data.b,
|
||||||
@ -120,7 +121,8 @@ function nstream!(data::ROCData{T}) where {T}
|
|||||||
@inbounds a[i] += b[i] + scalar * c[i]
|
@inbounds a[i] += b[i] + scalar * c[i]
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
hard_wait(
|
AMDGPU.wait(
|
||||||
|
soft = false, # soft wait causes HSA_REFCOUNT overflow issues
|
||||||
@roc groupsize = TBSize gridsize = gridsize(data) kernel(
|
@roc groupsize = TBSize gridsize = gridsize(data) kernel(
|
||||||
data.a,
|
data.a,
|
||||||
data.b,
|
data.b,
|
||||||
@ -160,7 +162,8 @@ function dot(data::ROCData{T}) where {T}
|
|||||||
return
|
return
|
||||||
end
|
end
|
||||||
partial_sum = ROCArray{T}(undef, DotBlocks)
|
partial_sum = ROCArray{T}(undef, DotBlocks)
|
||||||
hard_wait(
|
AMDGPU.wait(
|
||||||
|
soft = false, # soft wait causes HSA_REFCOUNT overflow issues
|
||||||
@roc groupsize = TBSize gridsize = TBSize * DotBlocks kernel(
|
@roc groupsize = TBSize gridsize = TBSize * DotBlocks kernel(
|
||||||
data.a,
|
data.a,
|
||||||
data.b,
|
data.b,
|
||||||
|
|||||||
@ -99,7 +99,6 @@ end
|
|||||||
|
|
||||||
@with_kw mutable struct Config
|
@with_kw mutable struct Config
|
||||||
list::Bool = false
|
list::Bool = false
|
||||||
impl::String = "threaded"
|
|
||||||
device::Int = 1
|
device::Int = 1
|
||||||
numtimes::Int = 100
|
numtimes::Int = 100
|
||||||
arraysize::Int = 33554432
|
arraysize::Int = 33554432
|
||||||
@ -201,7 +200,7 @@ function main()
|
|||||||
if !config.csv
|
if !config.csv
|
||||||
println("""BabelStream
|
println("""BabelStream
|
||||||
Version: $Version
|
Version: $Version
|
||||||
Implementation: Julia; $(config.impl)""")
|
Implementation: Julia; $(PROGRAM_FILE)""")
|
||||||
println("Running kernels $(config.numtimes) times")
|
println("Running kernels $(config.numtimes) times")
|
||||||
if benchmark == Triad
|
if benchmark == Triad
|
||||||
println("Number of elements: $(config.arraysize)")
|
println("Number of elements: $(config.arraysize)")
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user