Add comment about blockIdx/workgroupIdx in Julia
This commit is contained in:
parent
2e957d3f60
commit
7c1e04a42b
@ -64,7 +64,7 @@ end
|
|||||||
|
|
||||||
function copy!(data::ROCData{T}) where {T}
|
function copy!(data::ROCData{T}) where {T}
|
||||||
function kernel(a, c)
|
function kernel(a, c)
|
||||||
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x
|
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
|
||||||
@inbounds c[i] = a[i]
|
@inbounds c[i] = a[i]
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
@ -76,7 +76,7 @@ end
|
|||||||
|
|
||||||
function mul!(data::ROCData{T}) where {T}
|
function mul!(data::ROCData{T}) where {T}
|
||||||
function kernel(b, c, scalar)
|
function kernel(b, c, scalar)
|
||||||
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x
|
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
|
||||||
@inbounds b[i] = scalar * c[i]
|
@inbounds b[i] = scalar * c[i]
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
@ -88,7 +88,7 @@ end
|
|||||||
|
|
||||||
function add!(data::ROCData{T}) where {T}
|
function add!(data::ROCData{T}) where {T}
|
||||||
function kernel(a, b, c)
|
function kernel(a, b, c)
|
||||||
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x
|
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
|
||||||
@inbounds c[i] = a[i] + b[i]
|
@inbounds c[i] = a[i] + b[i]
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
@ -100,7 +100,7 @@ end
|
|||||||
|
|
||||||
function triad!(data::ROCData{T}) where {T}
|
function triad!(data::ROCData{T}) where {T}
|
||||||
function kernel(a, b, c, scalar)
|
function kernel(a, b, c, scalar)
|
||||||
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x
|
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
|
||||||
@inbounds a[i] = b[i] + (scalar * c[i])
|
@inbounds a[i] = b[i] + (scalar * c[i])
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
@ -117,7 +117,7 @@ end
|
|||||||
|
|
||||||
function nstream!(data::ROCData{T}) where {T}
|
function nstream!(data::ROCData{T}) where {T}
|
||||||
function kernel(a, b, c, scalar)
|
function kernel(a, b, c, scalar)
|
||||||
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x
|
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
|
||||||
@inbounds a[i] += b[i] + scalar * c[i]
|
@inbounds a[i] += b[i] + scalar * c[i]
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
@ -139,7 +139,7 @@ function dot(data::ROCData{T}) where {T}
|
|||||||
@inbounds tb_sum[local_i] = 0.0
|
@inbounds tb_sum[local_i] = 0.0
|
||||||
|
|
||||||
# do dot first
|
# do dot first
|
||||||
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x
|
i = (workgroupIdx().x - 1) * workgroupDim().x + workitemIdx().x # only workgroupIdx starts at 1
|
||||||
while i <= size
|
while i <= size
|
||||||
@inbounds tb_sum[local_i] += a[i] * b[i]
|
@inbounds tb_sum[local_i] += a[i] * b[i]
|
||||||
i += TBSize * DotBlocks # XXX don't use (workgroupDim().x * gridDimWG().x) here
|
i += TBSize * DotBlocks # XXX don't use (workgroupDim().x * gridDimWG().x) here
|
||||||
|
|||||||
@ -51,7 +51,7 @@ end
|
|||||||
|
|
||||||
function copy!(data::CuData{T}) where {T}
|
function copy!(data::CuData{T}) where {T}
|
||||||
function kernel(a, c)
|
function kernel(a, c)
|
||||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
|
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # only blockIdx starts at 1
|
||||||
@inbounds c[i] = a[i]
|
@inbounds c[i] = a[i]
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
@ -61,7 +61,7 @@ end
|
|||||||
|
|
||||||
function mul!(data::CuData{T}) where {T}
|
function mul!(data::CuData{T}) where {T}
|
||||||
function kernel(b, c, scalar)
|
function kernel(b, c, scalar)
|
||||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
|
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # only blockIdx starts at 1
|
||||||
@inbounds b[i] = scalar * c[i]
|
@inbounds b[i] = scalar * c[i]
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
@ -71,7 +71,7 @@ end
|
|||||||
|
|
||||||
function add!(data::CuData{T}) where {T}
|
function add!(data::CuData{T}) where {T}
|
||||||
function kernel(a, b, c)
|
function kernel(a, b, c)
|
||||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
|
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # only blockIdx starts at 1
|
||||||
@inbounds c[i] = a[i] + b[i]
|
@inbounds c[i] = a[i] + b[i]
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
@ -81,7 +81,7 @@ end
|
|||||||
|
|
||||||
function triad!(data::CuData{T}) where {T}
|
function triad!(data::CuData{T}) where {T}
|
||||||
function kernel(a, b, c, scalar)
|
function kernel(a, b, c, scalar)
|
||||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
|
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # only blockIdx starts at 1
|
||||||
@inbounds a[i] = b[i] + (scalar * c[i])
|
@inbounds a[i] = b[i] + (scalar * c[i])
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
@ -96,7 +96,7 @@ end
|
|||||||
|
|
||||||
function nstream!(data::CuData{T}) where {T}
|
function nstream!(data::CuData{T}) where {T}
|
||||||
function kernel(a, b, c, scalar)
|
function kernel(a, b, c, scalar)
|
||||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
|
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # only blockIdx starts at 1
|
||||||
@inbounds a[i] += b[i] + scalar * c[i]
|
@inbounds a[i] += b[i] + scalar * c[i]
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
@ -117,7 +117,7 @@ function dot(data::CuData{T}) where {T}
|
|||||||
@inbounds tb_sum[local_i] = 0.0
|
@inbounds tb_sum[local_i] = 0.0
|
||||||
|
|
||||||
# do dot first
|
# do dot first
|
||||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
|
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # only blockIdx starts at 1
|
||||||
while i <= size
|
while i <= size
|
||||||
@inbounds tb_sum[local_i] += a[i] * b[i]
|
@inbounds tb_sum[local_i] += a[i] * b[i]
|
||||||
i += blockDim().x * gridDim().x
|
i += blockDim().x * gridDim().x
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user