diff --git a/HCStream.cpp b/HCStream.cpp index 47602fc..b1b4a9b 100644 --- a/HCStream.cpp +++ b/HCStream.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2015-16 Peter Steinbach, MPI CBG Scientific Computing Facility +// Copyright (c) 2017 Peter Steinbach, MPI CBG Scientific Computing Facility // // For full license terms please see the LICENSE file distributed with this // source code @@ -10,10 +10,17 @@ #include #include +//specific sizes were obtained through experimentation using a Fiji R9 Nano with rocm 1.6-115 #ifndef VIRTUALTILESIZE -#define VIRTUALTILESIZE 1024 +#define VIRTUALTILESIZE 256 #endif +//specific sizes were obtained through experimentation using a Fiji R9 Nano with rocm 1.6-115 +#ifndef NTILES +#define NTILES 2048 +#endif + + std::string getDeviceName(const hc::accelerator& _acc) { std::wstring_convert, wchar_t> converter; @@ -210,16 +217,14 @@ T HCStream::dot() // ->Samples/CaseStudies/Reduction // ->CascadingReduction.h - static constexpr std::size_t n_tiles = 64; - const auto& view_a = this->d_a; const auto& view_b = this->d_b; auto ex = view_a.get_extent(); - const auto tiled_ex = hc::extent<1>(n_tiles * VIRTUALTILESIZE).tile(VIRTUALTILESIZE); + const auto tiled_ex = hc::extent<1>(NTILES * VIRTUALTILESIZE).tile(VIRTUALTILESIZE); const auto domain_sz = tiled_ex.size(); - hc::array partial(n_tiles); + hc::array partial(NTILES); hc::parallel_for_each(tiled_ex, [=, @@ -257,7 +262,7 @@ T HCStream::dot() throw; } - std::vector h_partial(n_tiles,0); + std::vector h_partial(NTILES,0); hc::copy(partial,h_partial.begin()); T result = std::accumulate(h_partial.begin(), h_partial.end(), 0.);