Use @simd instead of @fastmath for CPU reduction
This commit is contained in:
parent
41f1767365
commit
78b52a496c
@ -90,14 +90,14 @@ function dot(data::VectorData{T}, _) where {T}
|
|||||||
partial = Vector{T}(undef, Threads.nthreads())
|
partial = Vector{T}(undef, Threads.nthreads())
|
||||||
static_par_ranged(data.size, Threads.nthreads()) do group, startidx, endidx
|
static_par_ranged(data.size, Threads.nthreads()) do group, startidx, endidx
|
||||||
acc = zero(T)
|
acc = zero(T)
|
||||||
@fastmath for i = startidx:endidx
|
@simd for i = startidx:endidx
|
||||||
@inbounds acc += data.a[i] * data.b[i]
|
@inbounds acc += data.a[i] * data.b[i]
|
||||||
end
|
end
|
||||||
@inbounds partial[group] = acc
|
@inbounds partial[group] = acc
|
||||||
end
|
end
|
||||||
return sum(partial)
|
return sum(partial)
|
||||||
# This doesn't do well on aarch64 because of the excessive Threads.threadid() ccall
|
# This doesn't do well on aarch64 because of the excessive Threads.threadid() ccall
|
||||||
# and inhibited vectorisation from the lack of @fastmath
|
# and inhibited vectorisation from the lack of @simd
|
||||||
# partial = zeros(T, Threads.nthreads())
|
# partial = zeros(T, Threads.nthreads())
|
||||||
# Threads.@threads for i = 1:data.size
|
# Threads.@threads for i = 1:data.size
|
||||||
# @inbounds partial[Threads.threadid()] += (data.a[i] * data.b[i])
|
# @inbounds partial[Threads.threadid()] += (data.a[i] * data.b[i])
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user