Allocate driver solution check vectors *after* the main computation
Each Stream implementation owns its own data, so the driver code shouldn't allocate a large array just before. On processors with strong NUMA effects and smaller memory capacities per NUMA domain, these checking vectors can result in the main arrays being allocated in the wrong NUMA domain. The fix is to simply move the driver allocation until after the computation has finished and we want to check the answers. This commit only changes the driver; each model will be updated in subsequent commits. Fixes #80.
This commit is contained in:
parent
f373927ce8
commit
829aa15da0
@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file.
|
|||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
- Default branch renamed from `master` to `main`.
|
- Default branch renamed from `master` to `main`.
|
||||||
|
- Driver now delays allocating large checking vectors until after computation has finished.
|
||||||
- Use cl::sycl::id parameters instead of cl::sycl::item.
|
- Use cl::sycl::id parameters instead of cl::sycl::item.
|
||||||
- Update local copy of OpenCL C++ header file.
|
- Update local copy of OpenCL C++ header file.
|
||||||
- Ensure correct SYCL queue constructor with explicit async_handler.
|
- Ensure correct SYCL queue constructor with explicit async_handler.
|
||||||
|
|||||||
27
main.cpp
27
main.cpp
@ -130,14 +130,6 @@ void run()
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create host vectors
|
|
||||||
std::vector<T> a(ARRAY_SIZE);
|
|
||||||
std::vector<T> b(ARRAY_SIZE);
|
|
||||||
std::vector<T> c(ARRAY_SIZE);
|
|
||||||
|
|
||||||
// Result of the Dot kernel
|
|
||||||
T sum;
|
|
||||||
|
|
||||||
Stream<T> *stream;
|
Stream<T> *stream;
|
||||||
|
|
||||||
#if defined(CUDA)
|
#if defined(CUDA)
|
||||||
@ -184,6 +176,9 @@ void run()
|
|||||||
|
|
||||||
stream->init_arrays(startA, startB, startC);
|
stream->init_arrays(startA, startB, startC);
|
||||||
|
|
||||||
|
// Result of the Dot kernel
|
||||||
|
T sum;
|
||||||
|
|
||||||
// List of times
|
// List of times
|
||||||
std::vector<std::vector<double>> timings(5);
|
std::vector<std::vector<double>> timings(5);
|
||||||
|
|
||||||
@ -226,6 +221,11 @@ void run()
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check solutions
|
// Check solutions
|
||||||
|
// Create host vectors
|
||||||
|
std::vector<T> a(ARRAY_SIZE);
|
||||||
|
std::vector<T> b(ARRAY_SIZE);
|
||||||
|
std::vector<T> c(ARRAY_SIZE);
|
||||||
|
|
||||||
stream->read_arrays(a, b, c);
|
stream->read_arrays(a, b, c);
|
||||||
check_solution<T>(num_times, a, b, c, sum);
|
check_solution<T>(num_times, a, b, c, sum);
|
||||||
|
|
||||||
@ -338,11 +338,6 @@ void run_triad()
|
|||||||
std::cout.precision(ss);
|
std::cout.precision(ss);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create host vectors
|
|
||||||
std::vector<T> a(ARRAY_SIZE);
|
|
||||||
std::vector<T> b(ARRAY_SIZE);
|
|
||||||
std::vector<T> c(ARRAY_SIZE);
|
|
||||||
|
|
||||||
Stream<T> *stream;
|
Stream<T> *stream;
|
||||||
|
|
||||||
#if defined(CUDA)
|
#if defined(CUDA)
|
||||||
@ -399,7 +394,13 @@ void run_triad()
|
|||||||
double runtime = std::chrono::duration_cast<std::chrono::duration<double> >(t2 - t1).count();
|
double runtime = std::chrono::duration_cast<std::chrono::duration<double> >(t2 - t1).count();
|
||||||
|
|
||||||
// Check solutions
|
// Check solutions
|
||||||
|
// Create host vectors
|
||||||
|
std::vector<T> a(ARRAY_SIZE);
|
||||||
|
std::vector<T> b(ARRAY_SIZE);
|
||||||
|
std::vector<T> c(ARRAY_SIZE);
|
||||||
|
|
||||||
T sum = 0.0;
|
T sum = 0.0;
|
||||||
|
|
||||||
stream->read_arrays(a, b, c);
|
stream->read_arrays(a, b, c);
|
||||||
check_solution<T>(num_times, a, b, c, sum);
|
check_solution<T>(num_times, a, b, c, sum);
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user