Allocate driver solution check vectors *after* the main computation

Each Stream implementation owns its own data, so the driver code
shouldn't allocate a large array just before. On processors with
strong NUMA effects and smaller memory capacities per NUMA domain,
these checking vectors can result in the main arrays being
allocated in the wrong NUMA domain.

The fix is to simply move the driver allocation until after the
computation has finished and we want to check the answers.

This commit only changes the driver; each model will be updated
in subsequent commits.

Fixes #80.
This commit is contained in:
Tom Deakin 2020-12-07 10:35:27 +00:00
parent f373927ce8
commit 829aa15da0
2 changed files with 15 additions and 13 deletions

View File

@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file.
### Changed ### Changed
- Default branch renamed from `master` to `main`. - Default branch renamed from `master` to `main`.
- Driver now delays allocating large checking vectors until after computation has finished.
- Use cl::sycl::id parameters instead of cl::sycl::item. - Use cl::sycl::id parameters instead of cl::sycl::item.
- Update local copy of OpenCL C++ header file. - Update local copy of OpenCL C++ header file.
- Ensure correct SYCL queue constructor with explicit async_handler. - Ensure correct SYCL queue constructor with explicit async_handler.

View File

@ -130,14 +130,6 @@ void run()
} }
// Create host vectors
std::vector<T> a(ARRAY_SIZE);
std::vector<T> b(ARRAY_SIZE);
std::vector<T> c(ARRAY_SIZE);
// Result of the Dot kernel
T sum;
Stream<T> *stream; Stream<T> *stream;
#if defined(CUDA) #if defined(CUDA)
@ -184,6 +176,9 @@ void run()
stream->init_arrays(startA, startB, startC); stream->init_arrays(startA, startB, startC);
// Result of the Dot kernel
T sum;
// List of times // List of times
std::vector<std::vector<double>> timings(5); std::vector<std::vector<double>> timings(5);
@ -226,6 +221,11 @@ void run()
} }
// Check solutions // Check solutions
// Create host vectors
std::vector<T> a(ARRAY_SIZE);
std::vector<T> b(ARRAY_SIZE);
std::vector<T> c(ARRAY_SIZE);
stream->read_arrays(a, b, c); stream->read_arrays(a, b, c);
check_solution<T>(num_times, a, b, c, sum); check_solution<T>(num_times, a, b, c, sum);
@ -338,11 +338,6 @@ void run_triad()
std::cout.precision(ss); std::cout.precision(ss);
} }
// Create host vectors
std::vector<T> a(ARRAY_SIZE);
std::vector<T> b(ARRAY_SIZE);
std::vector<T> c(ARRAY_SIZE);
Stream<T> *stream; Stream<T> *stream;
#if defined(CUDA) #if defined(CUDA)
@ -399,7 +394,13 @@ void run_triad()
double runtime = std::chrono::duration_cast<std::chrono::duration<double> >(t2 - t1).count(); double runtime = std::chrono::duration_cast<std::chrono::duration<double> >(t2 - t1).count();
// Check solutions // Check solutions
// Create host vectors
std::vector<T> a(ARRAY_SIZE);
std::vector<T> b(ARRAY_SIZE);
std::vector<T> c(ARRAY_SIZE);
T sum = 0.0; T sum = 0.0;
stream->read_arrays(a, b, c); stream->read_arrays(a, b, c);
check_solution<T>(num_times, a, b, c, sum); check_solution<T>(num_times, a, b, c, sum);