refactored n_tiles into preprocessor macro

2017-07-31 14:20:59 +02:00 · 2017-07-31 14:20:59 +02:00 · 8509917dff
commit 8509917dff
parent 26279688d1
1 changed files with 12 additions and 7 deletions
--- a/HCStream.cpp
+++ b/HCStream.cpp
@ -1,4 +1,4 @@
-// Copyright (c) 2015-16 Peter Steinbach, MPI CBG Scientific Computing Facility
+// Copyright (c) 2017 Peter Steinbach, MPI CBG Scientific Computing Facility
 //
 // For full license terms please see the LICENSE file distributed with this
 // source code
@ -10,10 +10,17 @@
 #include <locale>
 #include <numeric>

+//specific sizes were obtained through experimentation using a Fiji R9 Nano with rocm 1.6-115
 #ifndef VIRTUALTILESIZE
-#define VIRTUALTILESIZE 1024
+#define VIRTUALTILESIZE 256
 #endif

+//specific sizes were obtained through experimentation using a Fiji R9 Nano with rocm 1.6-115
+#ifndef NTILES
+#define NTILES 2048
+#endif
+
+
 std::string getDeviceName(const hc::accelerator& _acc)
 {
  std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
@ -210,16 +217,14 @@ T HCStream<T>::dot()
    // ->Samples/CaseStudies/Reduction
    // ->CascadingReduction.h

-    static constexpr std::size_t n_tiles = 64;
-
    const auto& view_a = this->d_a;
    const auto& view_b = this->d_b;

    auto ex = view_a.get_extent();
-    const auto tiled_ex = hc::extent<1>(n_tiles * VIRTUALTILESIZE).tile(VIRTUALTILESIZE);
+    const auto tiled_ex = hc::extent<1>(NTILES * VIRTUALTILESIZE).tile(VIRTUALTILESIZE);
    const auto domain_sz = tiled_ex.size();

-    hc::array<T, 1> partial(n_tiles);
+    hc::array<T, 1> partial(NTILES);

    hc::parallel_for_each(tiled_ex,
                          [=,
@ -257,7 +262,7 @@ T HCStream<T>::dot()
        throw;
    }

-    std::vector<T> h_partial(n_tiles,0);
+    std::vector<T> h_partial(NTILES,0);
    hc::copy(partial,h_partial.begin());

    T result = std::accumulate(h_partial.begin(), h_partial.end(), 0.);