Use @simd instead of @fastmath for CPU reduction

2021-08-28 11:39:08 +01:00 · 2021-08-28 11:39:08 +01:00 · 78b52a496c
commit 78b52a496c
parent 41f1767365
1 changed files with 2 additions and 2 deletions
--- a/JuliaStream.jl/src/ThreadedStream.jl
+++ b/JuliaStream.jl/src/ThreadedStream.jl
@ -90,14 +90,14 @@ function dot(data::VectorData{T}, _) where {T}
  partial = Vector{T}(undef,  Threads.nthreads())
  static_par_ranged(data.size, Threads.nthreads()) do group, startidx, endidx
    acc = zero(T)
-    @fastmath for i = startidx:endidx
+    @simd for i = startidx:endidx
      @inbounds acc += data.a[i] * data.b[i]
    end
    @inbounds partial[group] = acc
  end
  return sum(partial)
  # This doesn't do well on aarch64 because of the excessive Threads.threadid() ccall 
-  # and inhibited vectorisation from the lack of @fastmath 
+  # and inhibited vectorisation from the lack of @simd 
  # partial = zeros(T, Threads.nthreads())
  # Threads.@threads for i = 1:data.size
  #   @inbounds partial[Threads.threadid()] += (data.a[i] * data.b[i])