diff --git a/CHANGELOG.md b/CHANGELOG.md index 2958793..cc135f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ All notable changes to this project will be documented in this file. ## Unreleased - Fix CUDA memory limit check. +- Use long double for `check_solution` in case of large problem size. ## [v4.0] - 2021-12-22 diff --git a/src/main.cpp b/src/main.cpp index 3035da0..c9d7694 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -487,15 +487,15 @@ void check_solution(const unsigned int ntimes, std::vector& a, std::vector goldSum = goldA * goldB * ARRAY_SIZE; // Calculate the average error - double errA = std::accumulate(a.begin(), a.end(), 0.0, [&](double sum, const T val){ return sum + fabs(val - goldA); }); + long double errA = std::accumulate(a.begin(), a.end(), 0.0, [&](double sum, const T val){ return sum + fabs(val - goldA); }); errA /= a.size(); - double errB = std::accumulate(b.begin(), b.end(), 0.0, [&](double sum, const T val){ return sum + fabs(val - goldB); }); + long double errB = std::accumulate(b.begin(), b.end(), 0.0, [&](double sum, const T val){ return sum + fabs(val - goldB); }); errB /= b.size(); - double errC = std::accumulate(c.begin(), c.end(), 0.0, [&](double sum, const T val){ return sum + fabs(val - goldC); }); + long double errC = std::accumulate(c.begin(), c.end(), 0.0, [&](double sum, const T val){ return sum + fabs(val - goldC); }); errC /= c.size(); - double errSum = fabs((sum - goldSum)/goldSum); + long double errSum = fabs((sum - goldSum)/goldSum); - double epsi = std::numeric_limits::epsilon() * 100.0; + long double epsi = std::numeric_limits::epsilon() * 100.0; if (errA > epsi) std::cerr