refactored n_tiles into preprocessor macro

This commit is contained in:
Peter Steinbach 2017-07-31 14:20:59 +02:00
parent 26279688d1
commit 8509917dff

View File

@ -1,4 +1,4 @@
// Copyright (c) 2015-16 Peter Steinbach, MPI CBG Scientific Computing Facility // Copyright (c) 2017 Peter Steinbach, MPI CBG Scientific Computing Facility
// //
// For full license terms please see the LICENSE file distributed with this // For full license terms please see the LICENSE file distributed with this
// source code // source code
@ -10,10 +10,17 @@
#include <locale> #include <locale>
#include <numeric> #include <numeric>
//specific sizes were obtained through experimentation using a Fiji R9 Nano with rocm 1.6-115
#ifndef VIRTUALTILESIZE #ifndef VIRTUALTILESIZE
#define VIRTUALTILESIZE 1024 #define VIRTUALTILESIZE 256
#endif #endif
//specific sizes were obtained through experimentation using a Fiji R9 Nano with rocm 1.6-115
#ifndef NTILES
#define NTILES 2048
#endif
std::string getDeviceName(const hc::accelerator& _acc) std::string getDeviceName(const hc::accelerator& _acc)
{ {
std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter; std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
@ -210,16 +217,14 @@ T HCStream<T>::dot()
// ->Samples/CaseStudies/Reduction // ->Samples/CaseStudies/Reduction
// ->CascadingReduction.h // ->CascadingReduction.h
static constexpr std::size_t n_tiles = 64;
const auto& view_a = this->d_a; const auto& view_a = this->d_a;
const auto& view_b = this->d_b; const auto& view_b = this->d_b;
auto ex = view_a.get_extent(); auto ex = view_a.get_extent();
const auto tiled_ex = hc::extent<1>(n_tiles * VIRTUALTILESIZE).tile(VIRTUALTILESIZE); const auto tiled_ex = hc::extent<1>(NTILES * VIRTUALTILESIZE).tile(VIRTUALTILESIZE);
const auto domain_sz = tiled_ex.size(); const auto domain_sz = tiled_ex.size();
hc::array<T, 1> partial(n_tiles); hc::array<T, 1> partial(NTILES);
hc::parallel_for_each(tiled_ex, hc::parallel_for_each(tiled_ex,
[=, [=,
@ -257,7 +262,7 @@ T HCStream<T>::dot()
throw; throw;
} }
std::vector<T> h_partial(n_tiles,0); std::vector<T> h_partial(NTILES,0);
hc::copy(partial,h_partial.begin()); hc::copy(partial,h_partial.begin());
T result = std::accumulate(h_partial.begin(), h_partial.end(), 0.); T result = std::accumulate(h_partial.begin(), h_partial.end(), 0.);