refactored n_tiles into preprocessor macro
This commit is contained in:
parent
26279688d1
commit
8509917dff
19
HCStream.cpp
19
HCStream.cpp
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) 2015-16 Peter Steinbach, MPI CBG Scientific Computing Facility
|
// Copyright (c) 2017 Peter Steinbach, MPI CBG Scientific Computing Facility
|
||||||
//
|
//
|
||||||
// For full license terms please see the LICENSE file distributed with this
|
// For full license terms please see the LICENSE file distributed with this
|
||||||
// source code
|
// source code
|
||||||
@ -10,10 +10,17 @@
|
|||||||
#include <locale>
|
#include <locale>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
|
|
||||||
|
//specific sizes were obtained through experimentation using a Fiji R9 Nano with rocm 1.6-115
|
||||||
#ifndef VIRTUALTILESIZE
|
#ifndef VIRTUALTILESIZE
|
||||||
#define VIRTUALTILESIZE 1024
|
#define VIRTUALTILESIZE 256
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
//specific sizes were obtained through experimentation using a Fiji R9 Nano with rocm 1.6-115
|
||||||
|
#ifndef NTILES
|
||||||
|
#define NTILES 2048
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
std::string getDeviceName(const hc::accelerator& _acc)
|
std::string getDeviceName(const hc::accelerator& _acc)
|
||||||
{
|
{
|
||||||
std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
|
std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
|
||||||
@ -210,16 +217,14 @@ T HCStream<T>::dot()
|
|||||||
// ->Samples/CaseStudies/Reduction
|
// ->Samples/CaseStudies/Reduction
|
||||||
// ->CascadingReduction.h
|
// ->CascadingReduction.h
|
||||||
|
|
||||||
static constexpr std::size_t n_tiles = 64;
|
|
||||||
|
|
||||||
const auto& view_a = this->d_a;
|
const auto& view_a = this->d_a;
|
||||||
const auto& view_b = this->d_b;
|
const auto& view_b = this->d_b;
|
||||||
|
|
||||||
auto ex = view_a.get_extent();
|
auto ex = view_a.get_extent();
|
||||||
const auto tiled_ex = hc::extent<1>(n_tiles * VIRTUALTILESIZE).tile(VIRTUALTILESIZE);
|
const auto tiled_ex = hc::extent<1>(NTILES * VIRTUALTILESIZE).tile(VIRTUALTILESIZE);
|
||||||
const auto domain_sz = tiled_ex.size();
|
const auto domain_sz = tiled_ex.size();
|
||||||
|
|
||||||
hc::array<T, 1> partial(n_tiles);
|
hc::array<T, 1> partial(NTILES);
|
||||||
|
|
||||||
hc::parallel_for_each(tiled_ex,
|
hc::parallel_for_each(tiled_ex,
|
||||||
[=,
|
[=,
|
||||||
@ -257,7 +262,7 @@ T HCStream<T>::dot()
|
|||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<T> h_partial(n_tiles,0);
|
std::vector<T> h_partial(NTILES,0);
|
||||||
hc::copy(partial,h_partial.begin());
|
hc::copy(partial,h_partial.begin());
|
||||||
|
|
||||||
T result = std::accumulate(h_partial.begin(), h_partial.end(), 0.);
|
T result = std::accumulate(h_partial.begin(), h_partial.end(), 0.);
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user